This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/sbp by this push:
new 24d8208 Ignore certain suffixes on archive basenames when searching
for a root
24d8208 is described below
commit 24d820847a08964d73adcfb72f55d2783a60ae18
Author: Sean B. Palmer <[email protected]>
AuthorDate: Thu Jan 29 19:31:06 2026 +0000
Ignore certain suffixes on archive basenames when searching for a root
---
atr/tasks/checks/targz.py | 18 +--
atr/tasks/checks/zipformat.py | 30 +++--
atr/util.py | 12 +-
tests/unit/recorders.py | 74 +++++++++++++
tests/unit/test_archive_member_limit.py | 59 +---------
tests/unit/test_archive_root_variants.py | 185 +++++++++++++++++++++++++++++++
6 files changed, 300 insertions(+), 78 deletions(-)
diff --git a/atr/tasks/checks/targz.py b/atr/tasks/checks/targz.py
index fd90463..4eae0fb 100644
--- a/atr/tasks/checks/targz.py
+++ b/atr/tasks/checks/targz.py
@@ -23,6 +23,7 @@ import atr.log as log
import atr.models.results as results
import atr.tarzip as tarzip
import atr.tasks.checks as checks
+import atr.util as util
class RootDirectoryError(Exception):
@@ -82,24 +83,27 @@ async def structure(args: checks.FunctionArguments) ->
results.Results | None:
return None
filename = artifact_abs_path.name
- expected_root: Final[str] = (
+ basename_from_filename: Final[str] = (
filename.removesuffix(".tar.gz") if filename.endswith(".tar.gz") else
filename.removesuffix(".tgz")
)
+ expected_roots: Final[list[str]] =
util.permitted_archive_roots(basename_from_filename)
+ expected_roots_display = ", ".join(expected_roots)
log.info(
- f"Checking structure for {artifact_abs_path} (expected root:
{expected_root}) (rel: {args.primary_rel_path})"
+ "Checking structure for "
+ f"{artifact_abs_path} (expected roots: {expected_roots_display}) (rel:
{args.primary_rel_path})"
)
try:
root = await asyncio.to_thread(root_directory, str(artifact_abs_path))
- if root == expected_root:
+ if root in expected_roots:
await recorder.success(
- "Archive contains exactly one root directory matching the
expected name",
- {"root": root, "expected": expected_root},
+ "Archive contains exactly one root directory matching an
expected name",
+ {"root": root, "basename_from_filename":
basename_from_filename, "expected_roots": expected_roots},
)
else:
await recorder.warning(
- f"Root directory '{root}' does not match expected name
'{expected_root}'",
- {"root": root, "expected": expected_root},
+ f"Root directory '{root}' does not match expected names
'{expected_roots_display}'",
+ {"root": root, "basename_from_filename":
basename_from_filename, "expected_roots": expected_roots},
)
except tarzip.ArchiveMemberLimitExceededError as e:
await recorder.failure(f"Archive has too many members: {e}", {"error":
str(e)})
diff --git a/atr/tasks/checks/zipformat.py b/atr/tasks/checks/zipformat.py
index d69138f..504de2f 100644
--- a/atr/tasks/checks/zipformat.py
+++ b/atr/tasks/checks/zipformat.py
@@ -101,28 +101,24 @@ def _structure_check_core_logic(artifact_path: str) ->
dict[str, Any]:
return {"error": "Archive is empty"}
base_name = os.path.basename(artifact_path)
- name_part = base_name.removesuffix(".zip")
- # # TODO: Airavata has e.g. "-source-release"
- # # It would be useful if there were a function in analysis.py for
stripping these
- # # But the root directory should probably always match the name
of the file sans suffix
- # # (This would also be easier to implement)
- # if name_part.endswith(("-src", "-bin", "-dist")):
- # name_part = "-".join(name_part.split("-")[:-1])
- expected_root = name_part
+ basename_from_filename = base_name.removesuffix(".zip")
+ expected_roots =
util.permitted_archive_roots(basename_from_filename)
root_dirs, non_rooted_files =
_structure_check_core_logic_find_roots(members)
member_names = [m.name for m in members]
actual_root, error_msg = _structure_check_core_logic_validate_root(
- member_names, root_dirs, non_rooted_files, expected_root
+ member_names, root_dirs, non_rooted_files, expected_roots
)
if error_msg:
+ result_data: dict[str, Any] = {"expected_roots":
expected_roots}
if error_msg.startswith("Root directory mismatch"):
- return {"warning": error_msg}
+ result_data["warning"] = error_msg
else:
- return {"error": error_msg}
+ result_data["error"] = error_msg
+ return result_data
if actual_root:
- return {"root_dir": actual_root}
+ return {"root_dir": actual_root, "expected_roots":
expected_roots}
return {"error": "Unknown structure validation error"}
except tarzip.ArchiveMemberLimitExceededError as e:
@@ -148,7 +144,7 @@ def _structure_check_core_logic_find_roots(members:
list[tarzip.Member]) -> tupl
def _structure_check_core_logic_validate_root(
- members: list[str], root_dirs: set[str], non_rooted_files: list[str],
expected_root: str
+ members: list[str], root_dirs: set[str], non_rooted_files: list[str],
expected_roots: list[str]
) -> tuple[str | None, str | None]:
"""Validate the identified root structure against expectations."""
if non_rooted_files:
@@ -159,14 +155,16 @@ def _structure_check_core_logic_validate_root(
return None, f"Multiple root directories found:
{sorted(list(root_dirs))}"
actual_root = next(iter(root_dirs))
- if actual_root != expected_root:
- return None, f"Root directory mismatch. Expected '{expected_root}',
found '{actual_root}'"
+ if actual_root not in expected_roots:
+ expected_roots_display = "', '".join(expected_roots)
+ return None, f"Root directory mismatch. Expected one of
'{expected_roots_display}', found '{actual_root}'"
# Check whether all members are under the correct root directory
+ expected_prefix = f"{actual_root.rstrip('/')}/"
for member in members:
if member == actual_root.rstrip("/"):
continue
- if not member.startswith(expected_root):
+ if not member.startswith(expected_prefix):
return None, f"Member found outside expected root directory:
{member}"
return actual_root, None
diff --git a/atr/util.py b/atr/util.py
index 64a2318..4b60e1a 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -57,7 +57,7 @@ import atr.user as user
T = TypeVar("T")
-USER_TESTS_ADDRESS: Final[str] = "[email protected]"
+ARCHIVE_ROOT_SUFFIXES: Final[tuple[str, ...]] = ("-source", "-src")
DEV_TEST_MID: Final[str] =
"CAH5JyZo8QnWmg9CwRSwWY=givhxw4nilyenjo71fkdk81j5...@mail.gmail.com"
DEV_THREAD_URLS: Final[dict[str, str]] = {
"CAH5JyZo8QnWmg9CwRSwWY=givhxw4nilyenjo71fkdk81j5...@mail.gmail.com":
"https://lists.apache.org/thread/z0o7xnjnyw2o886rxvvq2ql4rdfn754w",
@@ -70,6 +70,7 @@ DEV_THREAD_URLS: Final[dict[str, str]] = {
"CADL1oArKFcXvNb1MJfjN=10-yrfkxgpltrurdmm1r7ygatk...@mail.gmail.com":
"https://lists.apache.org/thread/d7119h2qm7jrd5zsbp8ghkk0lpvnnxnw",
"[email protected]":
"https://lists.apache.org/thread/gzjd2jv7yod5sk5rgdf4x33g5l3fdf5o",
}
+USER_TESTS_ADDRESS: Final[str] = "[email protected]"
class SshFingerprintError(ValueError):
@@ -744,6 +745,15 @@ def permitted_announce_recipients(asf_uid: str) ->
list[str]:
]
+def permitted_archive_roots(basename_from_filename: str) -> list[str]:
+ # TODO: Airavata uses "-source-release"
+ for suffix in ARCHIVE_ROOT_SUFFIXES:
+ if basename_from_filename.endswith(suffix):
+ expected_root_base = basename_from_filename.removesuffix(suffix)
+ return [expected_root_base, f"{expected_root_base}{suffix}"]
+ return [basename_from_filename]
+
+
def permitted_voting_recipients(asf_uid: str, committee_name: str) ->
list[str]:
return [
f"dev@{committee_name}.apache.org",
diff --git a/tests/unit/recorders.py b/tests/unit/recorders.py
new file mode 100644
index 0000000..33e5af0
--- /dev/null
+++ b/tests/unit/recorders.py
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime
+import pathlib
+from collections.abc import Awaitable, Callable
+
+import atr.models.sql as sql
+import atr.tasks.checks as checks
+
+
+class RecorderStub(checks.Recorder):
+ def __init__(self, path: pathlib.Path, checker: str) -> None:
+ super().__init__(
+ checker=checker,
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ member_rel_path=None,
+ afresh=False,
+ )
+ self._path = path
+ self.messages: list[tuple[str, str, dict | None]] = []
+
+ async def abs_path(self, rel_path: str | None = None) -> pathlib.Path |
None:
+ return self._path if (rel_path is None) else self._path / rel_path
+
+ async def primary_path_is_binary(self) -> bool:
+ return False
+
+ async def _add(
+ self,
+ status: sql.CheckResultStatus,
+ message: str,
+ data: object,
+ primary_rel_path: str | None = None,
+ member_rel_path: str | None = None,
+ ) -> sql.CheckResult:
+ self.messages.append((status.value, message, data if isinstance(data,
dict) else None))
+ return sql.CheckResult(
+ id=0,
+ release_name=self.release_name,
+ revision_number=self.revision_number,
+ checker=self.checker,
+ primary_rel_path=primary_rel_path,
+ member_rel_path=member_rel_path,
+ created=datetime.datetime.now(datetime.UTC),
+ status=status,
+ message=message,
+ data=data,
+ input_hash=None,
+ )
+
+
+def get_recorder(recorder: checks.Recorder) -> Callable[[],
Awaitable[checks.Recorder]]:
+ async def _recorder() -> checks.Recorder:
+ return recorder
+
+ return _recorder
diff --git a/tests/unit/test_archive_member_limit.py
b/tests/unit/test_archive_member_limit.py
index d085b1d..c3225fb 100644
--- a/tests/unit/test_archive_member_limit.py
+++ b/tests/unit/test_archive_member_limit.py
@@ -15,65 +15,19 @@
# specific language governing permissions and limitations
# under the License.
-import datetime
import io
-import pathlib
import tarfile
import zipfile
import pytest
import atr.archives as archives
-import atr.models.sql as sql
import atr.tarzip as tarzip
import atr.tasks.checks as checks
import atr.tasks.checks.license as license_checks
import atr.tasks.checks.targz as targz
import atr.tasks.checks.zipformat as zipformat
-
-
-class _RecorderStub(checks.Recorder):
- def __init__(self, path: pathlib.Path):
- super().__init__(
- checker="tests.unit.test_archive_member_limit",
- project_name="test",
- version_name="test",
- revision_number="00001",
- primary_rel_path=None,
- member_rel_path=None,
- afresh=False,
- )
- self._path = path
- self.messages: list[tuple[str, str, dict | None]] = []
-
- async def abs_path(self, rel_path: str | None = None) -> pathlib.Path |
None:
- return self._path if (rel_path is None) else self._path / rel_path
-
- async def primary_path_is_binary(self) -> bool:
- return False
-
- async def _add(
- self,
- status: sql.CheckResultStatus,
- message: str,
- data: object,
- primary_rel_path: str | None = None,
- member_rel_path: str | None = None,
- ) -> sql.CheckResult:
- self.messages.append((status.value, message, data if isinstance(data,
dict) else None))
- return sql.CheckResult(
- id=0,
- release_name=self.release_name,
- revision_number=self.revision_number,
- checker=self.checker,
- primary_rel_path=primary_rel_path,
- member_rel_path=member_rel_path,
- created=datetime.datetime.now(datetime.UTC),
- status=status,
- message=message,
- data=data,
- input_hash=None,
- )
+import tests.unit.recorders as recorders
def test_extract_wraps_member_limit(tmp_path, monkeypatch):
@@ -173,7 +127,7 @@ def test_open_archive_enforces_member_limit_zip(tmp_path):
async def test_targz_integrity_reports_member_limit(tmp_path, monkeypatch):
archive_path = tmp_path / "sample.tar"
_make_tar(archive_path, ["a.txt", "b.txt", "c.txt"])
- recorder = _RecorderStub(archive_path)
+ recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_member_limit")
original_open = tarzip.open_archive
@@ -193,7 +147,7 @@ async def
test_targz_structure_reports_member_limit(tmp_path, monkeypatch):
archive_path = tmp_path / "sample.tar"
# Must include the root directory here
_make_tar(archive_path, ["sample/a.txt", "sample/b.txt", "sample/c.txt"])
- recorder = _RecorderStub(archive_path)
+ recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_member_limit")
original_open = tarzip.open_archive
@@ -238,12 +192,9 @@ def
test_zipformat_structure_reports_member_limit(tmp_path, monkeypatch):
assert "too many members" in result.get("error", "").lower()
-async def _args_for(recorder: _RecorderStub) -> checks.FunctionArguments:
- async def _recorder() -> checks.Recorder:
- return recorder
-
+async def _args_for(recorder: recorders.RecorderStub) ->
checks.FunctionArguments:
return checks.FunctionArguments(
- recorder=_recorder,
+ recorder=recorders.get_recorder(recorder),
asf_uid="",
project_name="test",
version_name="test",
diff --git a/tests/unit/test_archive_root_variants.py
b/tests/unit/test_archive_root_variants.py
new file mode 100644
index 0000000..5a7f496
--- /dev/null
+++ b/tests/unit/test_archive_root_variants.py
@@ -0,0 +1,185 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import io
+import pathlib
+import tarfile
+import zipfile
+
+import pytest
+
+import atr.models.sql as sql
+import atr.tasks.checks as checks
+import atr.tasks.checks.targz as targz
+import atr.tasks.checks.zipformat as zipformat
+import tests.unit.recorders as recorders
+
+
[email protected]
+async def test_targz_structure_accepts_source_suffix_variant(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "apache-example-1.2.3-source.tar.gz"
+ _make_tar_gz(archive_path, ["apache-example-1.2.3/README.txt"])
+ recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_root_variants")
+ args = checks.FunctionArguments(
+ recorder=recorders.get_recorder(recorder),
+ asf_uid="",
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ extra_args={},
+ )
+
+ await targz.structure(args)
+
+ assert any(status == sql.CheckResultStatus.SUCCESS.value for status, _, _
in recorder.messages)
+
+
[email protected]
+async def test_targz_structure_accepts_src_suffix_variant(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "apache-example-1.2.3-src.tar.gz"
+ _make_tar_gz(archive_path, ["apache-example-1.2.3/README.txt"])
+ recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_root_variants")
+ args = checks.FunctionArguments(
+ recorder=recorders.get_recorder(recorder),
+ asf_uid="",
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ extra_args={},
+ )
+
+ await targz.structure(args)
+
+ assert any(status == sql.CheckResultStatus.SUCCESS.value for status, _, _
in recorder.messages)
+
+
[email protected]
+async def
test_targz_structure_rejects_source_root_when_filename_has_no_suffix(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "apache-example-1.2.3.tar.gz"
+ _make_tar_gz(archive_path, ["apache-example-1.2.3-source/README.txt"])
+ recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_root_variants")
+ args = checks.FunctionArguments(
+ recorder=recorders.get_recorder(recorder),
+ asf_uid="",
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ extra_args={},
+ )
+
+ await targz.structure(args)
+
+ assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _
in recorder.messages)
+
+
[email protected]
+async def
test_targz_structure_rejects_source_root_when_filename_has_src_suffix(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "apache-example-1.2.3-src.tar.gz"
+ _make_tar_gz(archive_path, ["apache-example-1.2.3-source/README.txt"])
+ recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_root_variants")
+ args = checks.FunctionArguments(
+ recorder=recorders.get_recorder(recorder),
+ asf_uid="",
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ extra_args={},
+ )
+
+ await targz.structure(args)
+
+ assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _
in recorder.messages)
+
+
[email protected]
+async def
test_targz_structure_rejects_src_root_when_filename_has_no_suffix(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "apache-example-1.2.3.tar.gz"
+ _make_tar_gz(archive_path, ["apache-example-1.2.3-src/README.txt"])
+ recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_root_variants")
+ args = checks.FunctionArguments(
+ recorder=recorders.get_recorder(recorder),
+ asf_uid="",
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ extra_args={},
+ )
+
+ await targz.structure(args)
+
+ assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _
in recorder.messages)
+
+
[email protected]
+async def
test_targz_structure_rejects_src_root_when_filename_has_source_suffix(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "apache-example-1.2.3-source.tar.gz"
+ _make_tar_gz(archive_path, ["apache-example-1.2.3-src/README.txt"])
+ recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_root_variants")
+ args = checks.FunctionArguments(
+ recorder=recorders.get_recorder(recorder),
+ asf_uid="",
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ extra_args={},
+ )
+
+ await targz.structure(args)
+
+ assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _
in recorder.messages)
+
+
+def test_zipformat_structure_accepts_src_suffix_variant(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "apache-example-1.2.3-src.zip"
+ _make_zip(archive_path, ["apache-example-1.2.3/README.txt"])
+
+ result = zipformat._structure_check_core_logic(str(archive_path))
+
+ assert result.get("error") is None
+ assert result.get("warning") is None
+ assert result.get("root_dir") == "apache-example-1.2.3"
+
+
+def test_zipformat_structure_rejects_dated_src_suffix(tmp_path: pathlib.Path)
-> None:
+ archive_path = tmp_path / "apache-example-1.2.3-src-20251202.zip"
+ _make_zip(archive_path, ["apache-example-1.2.3/README.txt"])
+
+ result = zipformat._structure_check_core_logic(str(archive_path))
+
+ assert "warning" in result
+ assert "Root directory mismatch" in result["warning"]
+
+
+def _make_tar_gz(path: pathlib.Path, members: list[str]) -> None:
+ with tarfile.open(path, "w:gz") as tf:
+ for name in members:
+ data = f"data-{name}".encode()
+ info = tarfile.TarInfo(name=name)
+ info.size = len(data)
+ tf.addfile(info, io.BytesIO(data))
+
+
+def _make_zip(path: pathlib.Path, members: list[str]) -> None:
+ with zipfile.ZipFile(path, "w") as zf:
+ for name in members:
+ zf.writestr(name, f"data-{name}")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]