This is an automated email from the ASF dual-hosted git repository. sbp pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
commit 3f201264506da4c62c9878f5fece046860e1591e Author: Sean B. Palmer <[email protected]> AuthorDate: Fri Jan 30 17:18:39 2026 +0000 Detect and allow package roots from npm pack output --- atr/tasks/checks/targz.py | 63 +++++++++++--- atr/tasks/checks/zipformat.py | 42 ++++++++- atr/util.py | 59 +++++++++++++ tests/unit/test_archive_root_variants.py | 144 +++++++++++++++++++++++++++++++ 4 files changed, 292 insertions(+), 16 deletions(-) diff --git a/atr/tasks/checks/targz.py b/atr/tasks/checks/targz.py index 4eae0fb..f3583b1 100644 --- a/atr/tasks/checks/targz.py +++ b/atr/tasks/checks/targz.py @@ -51,9 +51,10 @@ async def integrity(args: checks.FunctionArguments) -> results.Results | None: return None -def root_directory(tgz_path: str) -> str: - """Find the root directory in a tar archive and validate that it has only one root dir.""" +def root_directory(tgz_path: str) -> tuple[str, bytes | None]: # noqa: C901 + """Find root directory and extract package/package.json if found.""" root = None + package_json: bytes | None = None with tarzip.open_archive(tgz_path) as archive: for member in archive: @@ -63,18 +64,30 @@ def root_directory(tgz_path: str) -> str: parts = member.name.split("/", 1) if len(parts) >= 1: - if not root: + if root is None: root = parts[0] elif parts[0] != root: raise RootDirectoryError(f"Multiple root directories found: {root}, {parts[0]}") + if (root == "package") and (package_json is None): + member_name = member.name.lstrip("./") + if (member_name == "package/package.json") and member.isfile(): + size = member.size if hasattr(member, "size") else 0 + if (size > 0) and (size <= util.NPM_PACKAGE_JSON_MAX_SIZE): + f = archive.extractfile(member) + if f is not None: + try: + package_json = f.read() + finally: + f.close() + if not root: raise RootDirectoryError("No root directory found in archive") - return root + return root, package_json -async def structure(args: checks.FunctionArguments) -> results.Results | None: +async def structure(args: checks.FunctionArguments) -> results.Results | None: # noqa: C901 """Check the structure of a .tar.gz file.""" recorder = await args.recorder() if not (artifact_abs_path := await recorder.abs_path()): @@ -94,16 +107,42 @@ async def structure(args: checks.FunctionArguments) -> results.Results | None: ) try: - root = await asyncio.to_thread(root_directory, str(artifact_abs_path)) + root, package_json = await asyncio.to_thread(root_directory, str(artifact_abs_path)) + data: dict[str, object] = { + "root": root, + "basename_from_filename": basename_from_filename, + "expected_roots": expected_roots, + } if root in expected_roots: - await recorder.success( - "Archive contains exactly one root directory matching an expected name", - {"root": root, "basename_from_filename": basename_from_filename, "expected_roots": expected_roots}, - ) + await recorder.success("Archive contains exactly one root directory matching an expected name", data) + elif root == "package": + if package_json is not None: + npm_info, npm_error = util.parse_npm_pack_info(package_json, basename_from_filename) + if npm_info is not None: + data["npm_pack"] = { + "name": npm_info.name, + "version": npm_info.version, + "filename_match": npm_info.filename_match, + } + if npm_info.filename_match is False: + await recorder.warning( + "npm pack layout detected but filename does not match package.json", data + ) + else: + await recorder.success("npm pack layout detected, allowing package/ root", data) + else: + if npm_error is not None: + data["npm_pack_error"] = npm_error + await recorder.warning( + f"Root directory '{root}' does not match expected names '{expected_roots_display}'", data + ) + else: + await recorder.warning( + f"Root directory '{root}' does not match expected names '{expected_roots_display}'", data + ) else: await recorder.warning( - f"Root directory '{root}' does not match expected names '{expected_roots_display}'", - {"root": root, "basename_from_filename": basename_from_filename, "expected_roots": expected_roots}, + f"Root directory '{root}' does not match expected names '{expected_roots_display}'", data ) except tarzip.ArchiveMemberLimitExceededError as e: await recorder.failure(f"Archive has too many members: {e}", {"error": str(e)}) diff --git a/atr/tasks/checks/zipformat.py b/atr/tasks/checks/zipformat.py index 504de2f..6cfb0b5 100644 --- a/atr/tasks/checks/zipformat.py +++ b/atr/tasks/checks/zipformat.py @@ -92,11 +92,27 @@ def _integrity_check_core_logic(artifact_path: str) -> dict[str, Any]: return {"error": f"Unexpected error: {e}"} -def _structure_check_core_logic(artifact_path: str) -> dict[str, Any]: +def _structure_check_core_logic(artifact_path: str) -> dict[str, Any]: # noqa: C901 """Verify the internal structure of the zip archive.""" try: with tarzip.open_archive(artifact_path) as archive: - members: list[tarzip.Member] = list(archive) + members: list[tarzip.Member] = [] + package_json: bytes | None = None + + for member in archive: + members.append(member) + if package_json is None: + member_name = member.name.lstrip("./") + if (member_name == "package/package.json") and member.isfile(): + size = member.size if hasattr(member, "size") else 0 + if (size > 0) and (size <= util.NPM_PACKAGE_JSON_MAX_SIZE): + f = archive.extractfile(member) + if f is not None: + try: + package_json = f.read() + finally: + f.close() + if not members: return {"error": "Archive is empty"} @@ -110,7 +126,22 @@ def _structure_check_core_logic(artifact_path: str) -> dict[str, Any]: member_names, root_dirs, non_rooted_files, expected_roots ) - if error_msg: + if error_msg is not None: + if (actual_root == "package") and (package_json is not None): + npm_info, _ = util.parse_npm_pack_info(package_json, basename_from_filename) + if npm_info is not None: + npm_data: dict[str, Any] = { + "root_dir": actual_root, + "expected_roots": expected_roots, + "npm_pack": { + "name": npm_info.name, + "version": npm_info.version, + "filename_match": npm_info.filename_match, + }, + } + if npm_info.filename_match is False: + npm_data["warning"] = "npm pack layout detected but filename does not match package.json" + return npm_data result_data: dict[str, Any] = {"expected_roots": expected_roots} if error_msg.startswith("Root directory mismatch"): result_data["warning"] = error_msg @@ -157,7 +188,10 @@ def _structure_check_core_logic_validate_root( actual_root = next(iter(root_dirs)) if actual_root not in expected_roots: expected_roots_display = "', '".join(expected_roots) - return None, f"Root directory mismatch. Expected one of '{expected_roots_display}', found '{actual_root}'" + return ( + actual_root, + f"Root directory mismatch. Expected one of '{expected_roots_display}', found '{actual_root}'", + ) # Check whether all members are under the correct root directory expected_prefix = f"{actual_root.rstrip('/')}/" diff --git a/atr/util.py b/atr/util.py index 4b60e1a..b2d67f1 100644 --- a/atr/util.py +++ b/atr/util.py @@ -70,9 +70,17 @@ DEV_THREAD_URLS: Final[dict[str, str]] = { "CADL1oArKFcXvNb1MJfjN=10-yrfkxgpltrurdmm1r7ygatk...@mail.gmail.com": "https://lists.apache.org/thread/d7119h2qm7jrd5zsbp8ghkk0lpvnnxnw", "[email protected]": "https://lists.apache.org/thread/gzjd2jv7yod5sk5rgdf4x33g5l3fdf5o", } +NPM_PACKAGE_JSON_MAX_SIZE: Final[int] = 512 * 1024 USER_TESTS_ADDRESS: Final[str] = "[email protected]" [email protected](frozen=True) +class NpmPackInfo: + name: str + version: str + filename_match: bool | None + + class SshFingerprintError(ValueError): pass @@ -701,6 +709,20 @@ def parse_key_blocks_bytes(keys_data: bytes) -> list[str]: return key_blocks +def parse_npm_pack_info(raw: bytes, filename_basename: str | None = None) -> tuple[NpmPackInfo | None, str | None]: + """Parse npm pack info from package.json content.""" + parsed, error = _npm_pack_parse_package_json(raw) + if (error is not None) or (parsed is None): + return None, error + + name, version, error = _npm_pack_extract_name_version(parsed) + if (error is not None) or (name is None) or (version is None): + return None, error + + filename_match = _npm_pack_filename_match(filename_basename, name, version) + return NpmPackInfo(name=name, version=version, filename_match=filename_match), None + + async def paths_recursive(base_path: pathlib.Path) -> AsyncGenerator[pathlib.Path]: """Yield all file paths recursively within a base path, relative to the base path.""" if (resolved_base_path := await is_dir_resolve(base_path)) is None: @@ -1159,6 +1181,43 @@ def _generate_hexdump(data: bytes) -> str: return "\n".join(hex_lines) +def _npm_pack_extract_name_version(parsed: dict[str, Any]) -> tuple[str | None, str | None, str | None]: + name = parsed.get("name") + version = parsed.get("version") + + if (not isinstance(name, str)) or (not name.strip()): + return None, None, "package/package.json missing or invalid 'name'" + if (not isinstance(version, str)) or (not version.strip()): + return None, None, "package/package.json missing or invalid 'version'" + + return name.strip(), version.strip(), None + + +def _npm_pack_filename_match(filename_basename: str | None, name: str, version: str) -> bool | None: + if not filename_basename: + return None + if "/" in name: + return None + return filename_basename == f"{name}-{version}" + + +def _npm_pack_parse_package_json(raw: bytes) -> tuple[dict[str, Any] | None, str | None]: + try: + payload = raw.decode("utf-8") + except UnicodeDecodeError: + return None, "package/package.json is not valid UTF-8" + + try: + parsed = json.loads(payload) + except json.JSONDecodeError as exc: + return None, f"package/package.json is not valid JSON: {exc}" + + if not isinstance(parsed, dict): + return None, "package/package.json is not a JSON object" + + return parsed, None + + def _thread_messages_walk(node: dict[str, Any] | None, message_ids: set[str]) -> None: if not isinstance(node, dict): return diff --git a/tests/unit/test_archive_root_variants.py b/tests/unit/test_archive_root_variants.py index 5a7f496..36ec86b 100644 --- a/tests/unit/test_archive_root_variants.py +++ b/tests/unit/test_archive_root_variants.py @@ -16,6 +16,7 @@ # under the License. import io +import json import pathlib import tarfile import zipfile @@ -29,6 +30,33 @@ import atr.tasks.checks.zipformat as zipformat import tests.unit.recorders as recorders [email protected] +async def test_targz_structure_accepts_npm_pack_root(tmp_path: pathlib.Path) -> None: + archive_path = tmp_path / "example-1.2.3.tgz" + _make_tar_gz_with_contents( + archive_path, + { + "package/package.json": json.dumps({"name": "example", "version": "1.2.3"}), + "package/README.txt": "hello", + }, + ) + recorder = recorders.RecorderStub(archive_path, "tests.unit.test_archive_root_variants") + args = checks.FunctionArguments( + recorder=recorders.get_recorder(recorder), + asf_uid="", + project_name="test", + version_name="test", + revision_number="00001", + primary_rel_path=None, + extra_args={}, + ) + + await targz.structure(args) + + assert any(status == sql.CheckResultStatus.SUCCESS.value for status, _, _ in recorder.messages) + assert not any(status == sql.CheckResultStatus.FAILURE.value for status, _, _ in recorder.messages) + + @pytest.mark.asyncio async def test_targz_structure_accepts_source_suffix_variant(tmp_path: pathlib.Path) -> None: archive_path = tmp_path / "apache-example-1.2.3-source.tar.gz" @@ -69,6 +97,31 @@ async def test_targz_structure_accepts_src_suffix_variant(tmp_path: pathlib.Path assert any(status == sql.CheckResultStatus.SUCCESS.value for status, _, _ in recorder.messages) [email protected] +async def test_targz_structure_rejects_package_root_without_package_json(tmp_path: pathlib.Path) -> None: + archive_path = tmp_path / "example-1.2.3.tgz" + _make_tar_gz_with_contents( + archive_path, + { + "package/README.txt": "hello", + }, + ) + recorder = recorders.RecorderStub(archive_path, "tests.unit.test_archive_root_variants") + args = checks.FunctionArguments( + recorder=recorders.get_recorder(recorder), + asf_uid="", + project_name="test", + version_name="test", + revision_number="00001", + primary_rel_path=None, + extra_args={}, + ) + + await targz.structure(args) + + assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _ in recorder.messages) + + @pytest.mark.asyncio async def test_targz_structure_rejects_source_root_when_filename_has_no_suffix(tmp_path: pathlib.Path) -> None: archive_path = tmp_path / "apache-example-1.2.3.tar.gz" @@ -149,6 +202,50 @@ async def test_targz_structure_rejects_src_root_when_filename_has_source_suffix( assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _ in recorder.messages) [email protected] +async def test_targz_structure_warns_on_npm_pack_filename_mismatch(tmp_path: pathlib.Path) -> None: + archive_path = tmp_path / "example-1.2.3.tgz" + _make_tar_gz_with_contents( + archive_path, + { + "package/package.json": json.dumps({"name": "different", "version": "1.2.3"}), + "package/README.txt": "hello", + }, + ) + recorder = recorders.RecorderStub(archive_path, "tests.unit.test_archive_root_variants") + args = checks.FunctionArguments( + recorder=recorders.get_recorder(recorder), + asf_uid="", + project_name="test", + version_name="test", + revision_number="00001", + primary_rel_path=None, + extra_args={}, + ) + + await targz.structure(args) + + assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _ in recorder.messages) + assert any("npm pack layout detected" in message for _, message, _ in recorder.messages) + + +def test_zipformat_structure_accepts_npm_pack_root(tmp_path: pathlib.Path) -> None: + archive_path = tmp_path / "example-1.2.3.zip" + _make_zip_with_contents( + archive_path, + { + "package/package.json": json.dumps({"name": "example", "version": "1.2.3"}), + "package/README.txt": "hello", + }, + ) + + result = zipformat._structure_check_core_logic(str(archive_path)) + + assert result.get("error") is None + assert result.get("warning") is None + assert result.get("root_dir") == "package" + + def test_zipformat_structure_accepts_src_suffix_variant(tmp_path: pathlib.Path) -> None: archive_path = tmp_path / "apache-example-1.2.3-src.zip" _make_zip(archive_path, ["apache-example-1.2.3/README.txt"]) @@ -170,6 +267,38 @@ def test_zipformat_structure_rejects_dated_src_suffix(tmp_path: pathlib.Path) -> assert "Root directory mismatch" in result["warning"] +def test_zipformat_structure_rejects_package_root_without_package_json(tmp_path: pathlib.Path) -> None: + archive_path = tmp_path / "example-1.2.3.zip" + _make_zip_with_contents( + archive_path, + { + "package/README.txt": "hello", + }, + ) + + result = zipformat._structure_check_core_logic(str(archive_path)) + + assert result.get("warning") is not None + assert "Root directory mismatch" in result["warning"] + + +def test_zipformat_structure_warns_on_npm_pack_filename_mismatch(tmp_path: pathlib.Path) -> None: + archive_path = tmp_path / "example-1.2.3.zip" + _make_zip_with_contents( + archive_path, + { + "package/package.json": json.dumps({"name": "different", "version": "1.2.3"}), + "package/README.txt": "hello", + }, + ) + + result = zipformat._structure_check_core_logic(str(archive_path)) + + assert result.get("warning") is not None + assert "npm pack layout detected" in result["warning"] + assert result.get("root_dir") == "package" + + def _make_tar_gz(path: pathlib.Path, members: list[str]) -> None: with tarfile.open(path, "w:gz") as tf: for name in members: @@ -179,7 +308,22 @@ def _make_tar_gz(path: pathlib.Path, members: list[str]) -> None: tf.addfile(info, io.BytesIO(data)) +def _make_tar_gz_with_contents(path: pathlib.Path, members: dict[str, str]) -> None: + with tarfile.open(path, "w:gz") as tf: + for name, content in members.items(): + data = content.encode() + info = tarfile.TarInfo(name=name) + info.size = len(data) + tf.addfile(info, io.BytesIO(data)) + + def _make_zip(path: pathlib.Path, members: list[str]) -> None: with zipfile.ZipFile(path, "w") as zf: for name in members: zf.writestr(name, f"data-{name}") + + +def _make_zip_with_contents(path: pathlib.Path, members: dict[str, str]) -> None: + with zipfile.ZipFile(path, "w") as zf: + for name, content in members.items(): + zf.writestr(name, content) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
