This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/main by this push:
new 6560287 Ensure archive members limit can be disabled, and catch more
widely
6560287 is described below
commit 6560287bb3881d2d44103e92b774b7a989ee4634
Author: Sean B. Palmer <[email protected]>
AuthorDate: Wed Jan 28 15:50:56 2026 +0000
Ensure archive members limit can be disabled, and catch more widely
---
atr/tarzip.py | 23 +++--
atr/tasks/checks/license.py | 116 +++++++++++++----------
atr/tasks/checks/targz.py | 4 +
tests/unit/test_archive_member_limit.py | 160 ++++++++++++++++++++++++++++++++
4 files changed, 243 insertions(+), 60 deletions(-)
diff --git a/atr/tarzip.py b/atr/tarzip.py
index 7ad6a66..20e9919 100644
--- a/atr/tarzip.py
+++ b/atr/tarzip.py
@@ -111,24 +111,27 @@ class ArchiveContext[ArchiveT: (tarfile.TarFile,
zipfile.ZipFile)]:
def __iter__(self) -> Iterator[TarMember | ZipMember]:
count = 0
+ enforce_limit = self._max_members > 0
match self._archive_obj:
case tarfile.TarFile() as tf:
for member_orig in tf:
if member_orig.isdev():
continue
- count += 1
- if count > self._max_members:
- raise ArchiveMemberLimitExceededError(
- f"Archive contains too many members: exceeded
limit of {self._max_members}"
- )
+ if enforce_limit:
+ count += 1
+ if count > self._max_members:
+ raise ArchiveMemberLimitExceededError(
+ f"Archive contains too many members: exceeded
limit of {self._max_members}"
+ )
yield TarMember(member_orig)
case zipfile.ZipFile() as zf:
for member_orig in zf.infolist():
- count += 1
- if count > self._max_members:
- raise ArchiveMemberLimitExceededError(
- f"Archive contains too many members: exceeded
limit of {self._max_members}"
- )
+ if enforce_limit:
+ count += 1
+ if count > self._max_members:
+ raise ArchiveMemberLimitExceededError(
+ f"Archive contains too many members: exceeded
limit of {self._max_members}"
+ )
yield ZipMember(member_orig)
def extractfile(self, member_wrapper: Member) -> IO[bytes] | None:
diff --git a/atr/tasks/checks/license.py b/atr/tasks/checks/license.py
index 0e1a0c9..d9482f7 100644
--- a/atr/tasks/checks/license.py
+++ b/atr/tasks/checks/license.py
@@ -221,27 +221,35 @@ def _files_check_core_logic(artifact_path: str,
is_podling: bool) -> Iterator[Re
disclaimer_found = False
# Check for license files in the root directory
- with tarzip.open_archive(artifact_path) as archive:
- for member in archive:
- if member.name and member.name.split("/")[-1].startswith("._"):
- # Metadata convention
- continue
-
- if member.name.count("/") > 1:
- # Skip files in subdirectories
- continue
-
- filename = os.path.basename(member.name)
- if filename == "LICENSE":
- # TODO: Check length, should be 11,358 bytes
- license_diff = _files_check_core_logic_license(archive, member)
- license_results[filename] = license_diff
- elif filename == "NOTICE":
- # TODO: Check length doesn't exceed some preset
- notice_ok, notice_issues, notice_preamble =
_files_check_core_logic_notice(archive, member)
- notice_results[filename] = (notice_ok, notice_issues,
notice_preamble)
- elif filename in {"DISCLAIMER", "DISCLAIMER-WIP"}:
- disclaimer_found = True
+ try:
+ with tarzip.open_archive(artifact_path) as archive:
+ for member in archive:
+ if member.name and member.name.split("/")[-1].startswith("._"):
+ # Metadata convention
+ continue
+
+ if member.name.count("/") > 1:
+ # Skip files in subdirectories
+ continue
+
+ filename = os.path.basename(member.name)
+ if filename == "LICENSE":
+ # TODO: Check length, should be 11,358 bytes
+ license_diff = _files_check_core_logic_license(archive,
member)
+ license_results[filename] = license_diff
+ elif filename == "NOTICE":
+ # TODO: Check length doesn't exceed some preset
+ notice_ok, notice_issues, notice_preamble =
_files_check_core_logic_notice(archive, member)
+ notice_results[filename] = (notice_ok, notice_issues,
notice_preamble)
+ elif filename in {"DISCLAIMER", "DISCLAIMER-WIP"}:
+ disclaimer_found = True
+ except tarzip.ArchiveMemberLimitExceededError as e:
+ yield ArtifactResult(
+ status=sql.CheckResultStatus.FAILURE,
+ message=f"Archive has too many members: {e}",
+ data={"error": str(e)},
+ )
+ return
yield from _license_results(license_results)
yield from _notice_results(notice_results)
@@ -320,7 +328,7 @@ def _get_file_extension(filename: str) -> str | None:
return ext[1:].lower()
-def _headers_check_core_logic(artifact_path: str, ignore_lines: list[str],
excludes_source: str) -> Iterator[Result]:
+def _headers_check_core_logic(artifact_path: str, ignore_lines: list[str],
excludes_source: str) -> Iterator[Result]: # noqa: C901
"""Verify Apache License headers in source files within an archive."""
# We could modify @Lucas-C/pre-commit-hooks instead for this
# But hopefully this will be robust enough, at least for testing
@@ -341,34 +349,42 @@ def _headers_check_core_logic(artifact_path: str,
ignore_lines: list[str], exclu
# log.info(f"Ignore lines: {ignore_lines}")
# Check files in the archive
- with tarzip.open_archive(artifact_path) as archive:
- for member in archive:
- if member.name and member.name.split("/")[-1].startswith("._"):
- # Metadata convention
- continue
-
- ignore_path = "/" + artifact_basename + "/" +
member.name.lstrip("/")
- matcher = util.create_path_matcher(ignore_lines,
pathlib.Path(ignore_path), pathlib.Path("/"))
- # log.info(f"Checking {ignore_path} with matcher {matcher}")
- if matcher(ignore_path):
- # log.info(f"Skipping {ignore_path} because it matches the
ignore list")
- continue
-
- match _headers_check_core_logic_process_file(archive, member):
- case ArtifactResult() | MemberResult() as result:
- artifact_data.files_checked += 1
- match result.status:
- case sql.CheckResultStatus.SUCCESS:
- artifact_data.files_with_valid_headers += 1
- case sql.CheckResultStatus.FAILURE:
- artifact_data.files_with_invalid_headers += 1
- case sql.CheckResultStatus.WARNING:
- artifact_data.files_with_invalid_headers += 1
- case sql.CheckResultStatus.EXCEPTION:
- artifact_data.files_with_invalid_headers += 1
- yield result
- case MemberSkippedResult():
- artifact_data.files_skipped += 1
+ try:
+ with tarzip.open_archive(artifact_path) as archive:
+ for member in archive:
+ if member.name and member.name.split("/")[-1].startswith("._"):
+ # Metadata convention
+ continue
+
+ ignore_path = "/" + artifact_basename + "/" +
member.name.lstrip("/")
+ matcher = util.create_path_matcher(ignore_lines,
pathlib.Path(ignore_path), pathlib.Path("/"))
+ # log.info(f"Checking {ignore_path} with matcher {matcher}")
+ if matcher(ignore_path):
+ # log.info(f"Skipping {ignore_path} because it matches the
ignore list")
+ continue
+
+ match _headers_check_core_logic_process_file(archive, member):
+ case ArtifactResult() | MemberResult() as result:
+ artifact_data.files_checked += 1
+ match result.status:
+ case sql.CheckResultStatus.SUCCESS:
+ artifact_data.files_with_valid_headers += 1
+ case sql.CheckResultStatus.FAILURE:
+ artifact_data.files_with_invalid_headers += 1
+ case sql.CheckResultStatus.WARNING:
+ artifact_data.files_with_invalid_headers += 1
+ case sql.CheckResultStatus.EXCEPTION:
+ artifact_data.files_with_invalid_headers += 1
+ yield result
+ case MemberSkippedResult():
+ artifact_data.files_skipped += 1
+ except tarzip.ArchiveMemberLimitExceededError as e:
+ yield ArtifactResult(
+ status=sql.CheckResultStatus.FAILURE,
+ message=f"Archive has too many members: {e}",
+ data={"error": str(e)},
+ )
+ return
yield ArtifactResult(
status=sql.CheckResultStatus.SUCCESS,
diff --git a/atr/tasks/checks/targz.py b/atr/tasks/checks/targz.py
index 18ae3f4..fd90463 100644
--- a/atr/tasks/checks/targz.py
+++ b/atr/tasks/checks/targz.py
@@ -43,6 +43,8 @@ async def integrity(args: checks.FunctionArguments) ->
results.Results | None:
try:
size = await asyncio.to_thread(archives.total_size,
str(artifact_abs_path), chunk_size)
await recorder.success("Able to read all entries of the archive using
tarfile", {"size": size})
+ except tarzip.ArchiveMemberLimitExceededError as e:
+ await recorder.failure(f"Archive has too many members: {e}", {"error":
str(e)})
except Exception as e:
await recorder.failure("Unable to read all entries of the archive
using tarfile", {"error": str(e)})
return None
@@ -99,6 +101,8 @@ async def structure(args: checks.FunctionArguments) ->
results.Results | None:
f"Root directory '{root}' does not match expected name
'{expected_root}'",
{"root": root, "expected": expected_root},
)
+ except tarzip.ArchiveMemberLimitExceededError as e:
+ await recorder.failure(f"Archive has too many members: {e}", {"error":
str(e)})
except RootDirectoryError as e:
await recorder.warning("Could not get the root directory of the
archive", {"error": str(e)})
except Exception as e:
diff --git a/tests/unit/test_archive_member_limit.py
b/tests/unit/test_archive_member_limit.py
index e805d68..d085b1d 100644
--- a/tests/unit/test_archive_member_limit.py
+++ b/tests/unit/test_archive_member_limit.py
@@ -15,17 +15,67 @@
# specific language governing permissions and limitations
# under the License.
+import datetime
import io
+import pathlib
import tarfile
import zipfile
import pytest
import atr.archives as archives
+import atr.models.sql as sql
import atr.tarzip as tarzip
+import atr.tasks.checks as checks
+import atr.tasks.checks.license as license_checks
+import atr.tasks.checks.targz as targz
import atr.tasks.checks.zipformat as zipformat
+class _RecorderStub(checks.Recorder):
+ def __init__(self, path: pathlib.Path):
+ super().__init__(
+ checker="tests.unit.test_archive_member_limit",
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ member_rel_path=None,
+ afresh=False,
+ )
+ self._path = path
+ self.messages: list[tuple[str, str, dict | None]] = []
+
+ async def abs_path(self, rel_path: str | None = None) -> pathlib.Path |
None:
+ return self._path if (rel_path is None) else self._path / rel_path
+
+ async def primary_path_is_binary(self) -> bool:
+ return False
+
+ async def _add(
+ self,
+ status: sql.CheckResultStatus,
+ message: str,
+ data: object,
+ primary_rel_path: str | None = None,
+ member_rel_path: str | None = None,
+ ) -> sql.CheckResult:
+ self.messages.append((status.value, message, data if isinstance(data,
dict) else None))
+ return sql.CheckResult(
+ id=0,
+ release_name=self.release_name,
+ revision_number=self.revision_number,
+ checker=self.checker,
+ primary_rel_path=primary_rel_path,
+ member_rel_path=member_rel_path,
+ created=datetime.datetime.now(datetime.UTC),
+ status=status,
+ message=message,
+ data=data,
+ input_hash=None,
+ )
+
+
def test_extract_wraps_member_limit(tmp_path, monkeypatch):
archive_path = tmp_path / "sample.tar"
_make_tar(archive_path, ["a.txt", "b.txt", "c.txt"])
@@ -45,6 +95,62 @@ def test_extract_wraps_member_limit(tmp_path, monkeypatch):
assert "too many members" in str(excinfo.value).lower()
+def test_license_files_reports_member_limit(tmp_path, monkeypatch):
+ archive_path = tmp_path / "sample.tar"
+ _make_tar(archive_path, ["LICENSE", "NOTICE", "README.txt"])
+
+ original_open = tarzip.open_archive
+
+ def limited_open(path: str, *args, **kwargs):
+ return original_open(path, max_members=2)
+
+ monkeypatch.setattr(tarzip, "open_archive", limited_open)
+
+ results = list(license_checks._files_check_core_logic(str(archive_path),
is_podling=False))
+ assert any(
+ isinstance(result, license_checks.ArtifactResult) and ("too many
members" in result.message.lower())
+ for result in results
+ )
+
+
+def test_license_headers_reports_member_limit(tmp_path, monkeypatch):
+ archive_path = tmp_path / "sample.tar"
+ _make_tar(archive_path, ["main.py", "README.txt", "extra.txt"])
+
+ original_open = tarzip.open_archive
+
+ def limited_open(path: str, *args, **kwargs):
+ return original_open(path, max_members=2)
+
+ monkeypatch.setattr(tarzip, "open_archive", limited_open)
+
+ results = list(license_checks._headers_check_core_logic(str(archive_path),
[], "none"))
+ assert any(
+ isinstance(result, license_checks.ArtifactResult) and ("too many
members" in result.message.lower())
+ for result in results
+ )
+
+
+def test_open_archive_disables_member_limit_for_negative(tmp_path):
+ archive_path = tmp_path / "sample.zip"
+ _make_zip(archive_path, ["a.txt", "b.txt", "c.txt"])
+
+ with tarzip.open_archive(str(archive_path), max_members=-1) as archive:
+ members = list(archive)
+
+ assert len(members) == 3
+
+
+def test_open_archive_disables_member_limit_for_zero(tmp_path):
+ archive_path = tmp_path / "sample.tar"
+ _make_tar(archive_path, ["a.txt", "b.txt", "c.txt"])
+
+ with tarzip.open_archive(str(archive_path), max_members=0) as archive:
+ members = list(archive)
+
+ assert len(members) == 3
+
+
def test_open_archive_enforces_member_limit_tar(tmp_path):
archive_path = tmp_path / "sample.tar"
_make_tar(archive_path, ["a.txt", "b.txt", "c.txt"])
@@ -63,6 +169,45 @@ def test_open_archive_enforces_member_limit_zip(tmp_path):
list(archive)
[email protected]
+async def test_targz_integrity_reports_member_limit(tmp_path, monkeypatch):
+ archive_path = tmp_path / "sample.tar"
+ _make_tar(archive_path, ["a.txt", "b.txt", "c.txt"])
+ recorder = _RecorderStub(archive_path)
+
+ original_open = tarzip.open_archive
+
+ def limited_open(path: str, *args, **kwargs):
+ return original_open(path, max_members=2)
+
+ monkeypatch.setattr(tarzip, "open_archive", limited_open)
+
+ args = await _args_for(recorder)
+ await targz.integrity(args)
+
+ assert any("too many members" in message.lower() for _, message, _ in
recorder.messages)
+
+
[email protected]
+async def test_targz_structure_reports_member_limit(tmp_path, monkeypatch):
+ archive_path = tmp_path / "sample.tar"
+ # Must include the root directory here
+ _make_tar(archive_path, ["sample/a.txt", "sample/b.txt", "sample/c.txt"])
+ recorder = _RecorderStub(archive_path)
+
+ original_open = tarzip.open_archive
+
+ def limited_open(path: str, *args, **kwargs):
+ return original_open(path, max_members=2)
+
+ monkeypatch.setattr(tarzip, "open_archive", limited_open)
+
+ args = await _args_for(recorder)
+ await targz.structure(args)
+
+ assert any("too many members" in message.lower() for _, message, _ in
recorder.messages)
+
+
def test_zipformat_integrity_reports_member_limit(tmp_path, monkeypatch):
archive_path = tmp_path / "sample.zip"
_make_zip(archive_path, ["a.txt", "b.txt", "c.txt"])
@@ -93,6 +238,21 @@ def test_zipformat_structure_reports_member_limit(tmp_path,
monkeypatch):
assert "too many members" in result.get("error", "").lower()
+async def _args_for(recorder: _RecorderStub) -> checks.FunctionArguments:
+ async def _recorder() -> checks.Recorder:
+ return recorder
+
+ return checks.FunctionArguments(
+ recorder=_recorder,
+ asf_uid="",
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ extra_args={},
+ )
+
+
def _make_tar(path, members: list[str]) -> None:
with tarfile.open(path, "w") as tf:
for name in members:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]