This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/sbp by this push:
new 7233089 Detect and allow package roots from npm pack output
7233089 is described below
commit 72330898d2cf1b173df94b6e9f466bf812e2f77e
Author: Sean B. Palmer <[email protected]>
AuthorDate: Fri Jan 30 17:18:39 2026 +0000
Detect and allow package roots from npm pack output
---
atr/tasks/checks/targz.py | 63 +++++++++++---
atr/tasks/checks/zipformat.py | 42 ++++++++-
atr/util.py | 59 +++++++++++++
tests/unit/test_archive_root_variants.py | 144 +++++++++++++++++++++++++++++++
4 files changed, 292 insertions(+), 16 deletions(-)
diff --git a/atr/tasks/checks/targz.py b/atr/tasks/checks/targz.py
index 4eae0fb..f3583b1 100644
--- a/atr/tasks/checks/targz.py
+++ b/atr/tasks/checks/targz.py
@@ -51,9 +51,10 @@ async def integrity(args: checks.FunctionArguments) ->
results.Results | None:
return None
-def root_directory(tgz_path: str) -> str:
- """Find the root directory in a tar archive and validate that it has only
one root dir."""
+def root_directory(tgz_path: str) -> tuple[str, bytes | None]: # noqa: C901
+ """Find root directory and extract package/package.json if found."""
root = None
+ package_json: bytes | None = None
with tarzip.open_archive(tgz_path) as archive:
for member in archive:
@@ -63,18 +64,30 @@ def root_directory(tgz_path: str) -> str:
parts = member.name.split("/", 1)
if len(parts) >= 1:
- if not root:
+ if root is None:
root = parts[0]
elif parts[0] != root:
raise RootDirectoryError(f"Multiple root directories
found: {root}, {parts[0]}")
+ if (root == "package") and (package_json is None):
+ member_name = member.name.lstrip("./")
+ if (member_name == "package/package.json") and member.isfile():
+ size = member.size if hasattr(member, "size") else 0
+ if (size > 0) and (size <= util.NPM_PACKAGE_JSON_MAX_SIZE):
+ f = archive.extractfile(member)
+ if f is not None:
+ try:
+ package_json = f.read()
+ finally:
+ f.close()
+
if not root:
raise RootDirectoryError("No root directory found in archive")
- return root
+ return root, package_json
-async def structure(args: checks.FunctionArguments) -> results.Results | None:
+async def structure(args: checks.FunctionArguments) -> results.Results | None:
# noqa: C901
"""Check the structure of a .tar.gz file."""
recorder = await args.recorder()
if not (artifact_abs_path := await recorder.abs_path()):
@@ -94,16 +107,42 @@ async def structure(args: checks.FunctionArguments) ->
results.Results | None:
)
try:
- root = await asyncio.to_thread(root_directory, str(artifact_abs_path))
+ root, package_json = await asyncio.to_thread(root_directory,
str(artifact_abs_path))
+ data: dict[str, object] = {
+ "root": root,
+ "basename_from_filename": basename_from_filename,
+ "expected_roots": expected_roots,
+ }
if root in expected_roots:
- await recorder.success(
- "Archive contains exactly one root directory matching an
expected name",
- {"root": root, "basename_from_filename":
basename_from_filename, "expected_roots": expected_roots},
- )
+ await recorder.success("Archive contains exactly one root
directory matching an expected name", data)
+ elif root == "package":
+ if package_json is not None:
+ npm_info, npm_error = util.parse_npm_pack_info(package_json,
basename_from_filename)
+ if npm_info is not None:
+ data["npm_pack"] = {
+ "name": npm_info.name,
+ "version": npm_info.version,
+ "filename_match": npm_info.filename_match,
+ }
+ if npm_info.filename_match is False:
+ await recorder.warning(
+ "npm pack layout detected but filename does not
match package.json", data
+ )
+ else:
+ await recorder.success("npm pack layout detected,
allowing package/ root", data)
+ else:
+ if npm_error is not None:
+ data["npm_pack_error"] = npm_error
+ await recorder.warning(
+ f"Root directory '{root}' does not match expected
names '{expected_roots_display}'", data
+ )
+ else:
+ await recorder.warning(
+ f"Root directory '{root}' does not match expected names
'{expected_roots_display}'", data
+ )
else:
await recorder.warning(
- f"Root directory '{root}' does not match expected names
'{expected_roots_display}'",
- {"root": root, "basename_from_filename":
basename_from_filename, "expected_roots": expected_roots},
+ f"Root directory '{root}' does not match expected names
'{expected_roots_display}'", data
)
except tarzip.ArchiveMemberLimitExceededError as e:
await recorder.failure(f"Archive has too many members: {e}", {"error":
str(e)})
diff --git a/atr/tasks/checks/zipformat.py b/atr/tasks/checks/zipformat.py
index 504de2f..6cfb0b5 100644
--- a/atr/tasks/checks/zipformat.py
+++ b/atr/tasks/checks/zipformat.py
@@ -92,11 +92,27 @@ def _integrity_check_core_logic(artifact_path: str) ->
dict[str, Any]:
return {"error": f"Unexpected error: {e}"}
-def _structure_check_core_logic(artifact_path: str) -> dict[str, Any]:
+def _structure_check_core_logic(artifact_path: str) -> dict[str, Any]: #
noqa: C901
"""Verify the internal structure of the zip archive."""
try:
with tarzip.open_archive(artifact_path) as archive:
- members: list[tarzip.Member] = list(archive)
+ members: list[tarzip.Member] = []
+ package_json: bytes | None = None
+
+ for member in archive:
+ members.append(member)
+ if package_json is None:
+ member_name = member.name.lstrip("./")
+ if (member_name == "package/package.json") and
member.isfile():
+ size = member.size if hasattr(member, "size") else 0
+ if (size > 0) and (size <=
util.NPM_PACKAGE_JSON_MAX_SIZE):
+ f = archive.extractfile(member)
+ if f is not None:
+ try:
+ package_json = f.read()
+ finally:
+ f.close()
+
if not members:
return {"error": "Archive is empty"}
@@ -110,7 +126,22 @@ def _structure_check_core_logic(artifact_path: str) ->
dict[str, Any]:
member_names, root_dirs, non_rooted_files, expected_roots
)
- if error_msg:
+ if error_msg is not None:
+ if (actual_root == "package") and (package_json is not None):
+ npm_info, _ = util.parse_npm_pack_info(package_json,
basename_from_filename)
+ if npm_info is not None:
+ npm_data: dict[str, Any] = {
+ "root_dir": actual_root,
+ "expected_roots": expected_roots,
+ "npm_pack": {
+ "name": npm_info.name,
+ "version": npm_info.version,
+ "filename_match": npm_info.filename_match,
+ },
+ }
+ if npm_info.filename_match is False:
+ npm_data["warning"] = "npm pack layout detected
but filename does not match package.json"
+ return npm_data
result_data: dict[str, Any] = {"expected_roots":
expected_roots}
if error_msg.startswith("Root directory mismatch"):
result_data["warning"] = error_msg
@@ -157,7 +188,10 @@ def _structure_check_core_logic_validate_root(
actual_root = next(iter(root_dirs))
if actual_root not in expected_roots:
expected_roots_display = "', '".join(expected_roots)
- return None, f"Root directory mismatch. Expected one of
'{expected_roots_display}', found '{actual_root}'"
+ return (
+ actual_root,
+ f"Root directory mismatch. Expected one of
'{expected_roots_display}', found '{actual_root}'",
+ )
# Check whether all members are under the correct root directory
expected_prefix = f"{actual_root.rstrip('/')}/"
diff --git a/atr/util.py b/atr/util.py
index 4b60e1a..b2d67f1 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -70,9 +70,17 @@ DEV_THREAD_URLS: Final[dict[str, str]] = {
"CADL1oArKFcXvNb1MJfjN=10-yrfkxgpltrurdmm1r7ygatk...@mail.gmail.com":
"https://lists.apache.org/thread/d7119h2qm7jrd5zsbp8ghkk0lpvnnxnw",
"[email protected]":
"https://lists.apache.org/thread/gzjd2jv7yod5sk5rgdf4x33g5l3fdf5o",
}
+NPM_PACKAGE_JSON_MAX_SIZE: Final[int] = 512 * 1024
USER_TESTS_ADDRESS: Final[str] = "[email protected]"
[email protected](frozen=True)
+class NpmPackInfo:
+ name: str
+ version: str
+ filename_match: bool | None
+
+
class SshFingerprintError(ValueError):
pass
@@ -701,6 +709,20 @@ def parse_key_blocks_bytes(keys_data: bytes) -> list[str]:
return key_blocks
+def parse_npm_pack_info(raw: bytes, filename_basename: str | None = None) ->
tuple[NpmPackInfo | None, str | None]:
+ """Parse npm pack info from package.json content."""
+ parsed, error = _npm_pack_parse_package_json(raw)
+ if (error is not None) or (parsed is None):
+ return None, error
+
+ name, version, error = _npm_pack_extract_name_version(parsed)
+ if (error is not None) or (name is None) or (version is None):
+ return None, error
+
+ filename_match = _npm_pack_filename_match(filename_basename, name, version)
+ return NpmPackInfo(name=name, version=version,
filename_match=filename_match), None
+
+
async def paths_recursive(base_path: pathlib.Path) ->
AsyncGenerator[pathlib.Path]:
"""Yield all file paths recursively within a base path, relative to the
base path."""
if (resolved_base_path := await is_dir_resolve(base_path)) is None:
@@ -1159,6 +1181,43 @@ def _generate_hexdump(data: bytes) -> str:
return "\n".join(hex_lines)
+def _npm_pack_extract_name_version(parsed: dict[str, Any]) -> tuple[str |
None, str | None, str | None]:
+ name = parsed.get("name")
+ version = parsed.get("version")
+
+ if (not isinstance(name, str)) or (not name.strip()):
+ return None, None, "package/package.json missing or invalid 'name'"
+ if (not isinstance(version, str)) or (not version.strip()):
+ return None, None, "package/package.json missing or invalid 'version'"
+
+ return name.strip(), version.strip(), None
+
+
+def _npm_pack_filename_match(filename_basename: str | None, name: str,
version: str) -> bool | None:
+ if not filename_basename:
+ return None
+ if "/" in name:
+ return None
+ return filename_basename == f"{name}-{version}"
+
+
+def _npm_pack_parse_package_json(raw: bytes) -> tuple[dict[str, Any] | None,
str | None]:
+ try:
+ payload = raw.decode("utf-8")
+ except UnicodeDecodeError:
+ return None, "package/package.json is not valid UTF-8"
+
+ try:
+ parsed = json.loads(payload)
+ except json.JSONDecodeError as exc:
+ return None, f"package/package.json is not valid JSON: {exc}"
+
+ if not isinstance(parsed, dict):
+ return None, "package/package.json is not a JSON object"
+
+ return parsed, None
+
+
def _thread_messages_walk(node: dict[str, Any] | None, message_ids: set[str])
-> None:
if not isinstance(node, dict):
return
diff --git a/tests/unit/test_archive_root_variants.py
b/tests/unit/test_archive_root_variants.py
index 5a7f496..36ec86b 100644
--- a/tests/unit/test_archive_root_variants.py
+++ b/tests/unit/test_archive_root_variants.py
@@ -16,6 +16,7 @@
# under the License.
import io
+import json
import pathlib
import tarfile
import zipfile
@@ -29,6 +30,33 @@ import atr.tasks.checks.zipformat as zipformat
import tests.unit.recorders as recorders
[email protected]
+async def test_targz_structure_accepts_npm_pack_root(tmp_path: pathlib.Path)
-> None:
+ archive_path = tmp_path / "example-1.2.3.tgz"
+ _make_tar_gz_with_contents(
+ archive_path,
+ {
+ "package/package.json": json.dumps({"name": "example", "version":
"1.2.3"}),
+ "package/README.txt": "hello",
+ },
+ )
+ recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_root_variants")
+ args = checks.FunctionArguments(
+ recorder=recorders.get_recorder(recorder),
+ asf_uid="",
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ extra_args={},
+ )
+
+ await targz.structure(args)
+
+ assert any(status == sql.CheckResultStatus.SUCCESS.value for status, _, _
in recorder.messages)
+ assert not any(status == sql.CheckResultStatus.FAILURE.value for status,
_, _ in recorder.messages)
+
+
@pytest.mark.asyncio
async def test_targz_structure_accepts_source_suffix_variant(tmp_path:
pathlib.Path) -> None:
archive_path = tmp_path / "apache-example-1.2.3-source.tar.gz"
@@ -69,6 +97,31 @@ async def
test_targz_structure_accepts_src_suffix_variant(tmp_path: pathlib.Path
assert any(status == sql.CheckResultStatus.SUCCESS.value for status, _, _
in recorder.messages)
[email protected]
+async def
test_targz_structure_rejects_package_root_without_package_json(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "example-1.2.3.tgz"
+ _make_tar_gz_with_contents(
+ archive_path,
+ {
+ "package/README.txt": "hello",
+ },
+ )
+ recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_root_variants")
+ args = checks.FunctionArguments(
+ recorder=recorders.get_recorder(recorder),
+ asf_uid="",
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ extra_args={},
+ )
+
+ await targz.structure(args)
+
+ assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _
in recorder.messages)
+
+
@pytest.mark.asyncio
async def
test_targz_structure_rejects_source_root_when_filename_has_no_suffix(tmp_path:
pathlib.Path) -> None:
archive_path = tmp_path / "apache-example-1.2.3.tar.gz"
@@ -149,6 +202,50 @@ async def
test_targz_structure_rejects_src_root_when_filename_has_source_suffix(
assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _
in recorder.messages)
[email protected]
+async def test_targz_structure_warns_on_npm_pack_filename_mismatch(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "example-1.2.3.tgz"
+ _make_tar_gz_with_contents(
+ archive_path,
+ {
+ "package/package.json": json.dumps({"name": "different",
"version": "1.2.3"}),
+ "package/README.txt": "hello",
+ },
+ )
+ recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_root_variants")
+ args = checks.FunctionArguments(
+ recorder=recorders.get_recorder(recorder),
+ asf_uid="",
+ project_name="test",
+ version_name="test",
+ revision_number="00001",
+ primary_rel_path=None,
+ extra_args={},
+ )
+
+ await targz.structure(args)
+
+ assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _
in recorder.messages)
+ assert any("npm pack layout detected" in message for _, message, _ in
recorder.messages)
+
+
+def test_zipformat_structure_accepts_npm_pack_root(tmp_path: pathlib.Path) ->
None:
+ archive_path = tmp_path / "example-1.2.3.zip"
+ _make_zip_with_contents(
+ archive_path,
+ {
+ "package/package.json": json.dumps({"name": "example", "version":
"1.2.3"}),
+ "package/README.txt": "hello",
+ },
+ )
+
+ result = zipformat._structure_check_core_logic(str(archive_path))
+
+ assert result.get("error") is None
+ assert result.get("warning") is None
+ assert result.get("root_dir") == "package"
+
+
def test_zipformat_structure_accepts_src_suffix_variant(tmp_path:
pathlib.Path) -> None:
archive_path = tmp_path / "apache-example-1.2.3-src.zip"
_make_zip(archive_path, ["apache-example-1.2.3/README.txt"])
@@ -170,6 +267,38 @@ def
test_zipformat_structure_rejects_dated_src_suffix(tmp_path: pathlib.Path) ->
assert "Root directory mismatch" in result["warning"]
+def
test_zipformat_structure_rejects_package_root_without_package_json(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "example-1.2.3.zip"
+ _make_zip_with_contents(
+ archive_path,
+ {
+ "package/README.txt": "hello",
+ },
+ )
+
+ result = zipformat._structure_check_core_logic(str(archive_path))
+
+ assert result.get("warning") is not None
+ assert "Root directory mismatch" in result["warning"]
+
+
+def test_zipformat_structure_warns_on_npm_pack_filename_mismatch(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "example-1.2.3.zip"
+ _make_zip_with_contents(
+ archive_path,
+ {
+ "package/package.json": json.dumps({"name": "different",
"version": "1.2.3"}),
+ "package/README.txt": "hello",
+ },
+ )
+
+ result = zipformat._structure_check_core_logic(str(archive_path))
+
+ assert result.get("warning") is not None
+ assert "npm pack layout detected" in result["warning"]
+ assert result.get("root_dir") == "package"
+
+
def _make_tar_gz(path: pathlib.Path, members: list[str]) -> None:
with tarfile.open(path, "w:gz") as tf:
for name in members:
@@ -179,7 +308,22 @@ def _make_tar_gz(path: pathlib.Path, members: list[str])
-> None:
tf.addfile(info, io.BytesIO(data))
+def _make_tar_gz_with_contents(path: pathlib.Path, members: dict[str, str]) ->
None:
+ with tarfile.open(path, "w:gz") as tf:
+ for name, content in members.items():
+ data = content.encode()
+ info = tarfile.TarInfo(name=name)
+ info.size = len(data)
+ tf.addfile(info, io.BytesIO(data))
+
+
def _make_zip(path: pathlib.Path, members: list[str]) -> None:
with zipfile.ZipFile(path, "w") as zf:
for name in members:
zf.writestr(name, f"data-{name}")
+
+
+def _make_zip_with_contents(path: pathlib.Path, members: dict[str, str]) ->
None:
+ with zipfile.ZipFile(path, "w") as zf:
+ for name, content in members.items():
+ zf.writestr(name, content)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]