This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/sbp by this push:
     new 554490a0 Search for license and notice files recursively in binary 
archives
554490a0 is described below

commit 554490a06ff9bad706cffdb5aa50185b0ca8c478
Author: Sean B. Palmer <[email protected]>
AuthorDate: Wed Mar 18 16:11:01 2026 +0000

    Search for license and notice files recursively in binary archives
---
 atr/tasks/checks/license.py       | 146 ++++++++++++++++++++++++++++----------
 tests/unit/test_checks_license.py |  87 +++++++++++++++++++++--
 2 files changed, 190 insertions(+), 43 deletions(-)

diff --git a/atr/tasks/checks/license.py b/atr/tasks/checks/license.py
index 722e160f..7946b0b2 100644
--- a/atr/tasks/checks/license.py
+++ b/atr/tasks/checks/license.py
@@ -81,9 +81,11 @@ INCLUDED_PATTERNS: Final[list[str]] = [
 # Release policy fields which this check relies on - used for result caching
 INPUT_POLICY_KEYS: Final[list[str]] = ["license_check_mode", 
"source_excludes_lightweight"]
 INPUT_EXTRA_ARGS: Final[list[str]] = ["is_podling"]
-CHECK_VERSION_FILES: Final[str] = "3"
+CHECK_VERSION_FILES: Final[str] = "4"
 CHECK_VERSION_HEADERS: Final[str] = "3"
 
+_BINARY_LICENSE_FILENAMES: Final[frozenset[str]] = frozenset({"LICENSE", 
"LICENSE.txt"})
+_BINARY_NOTICE_FILENAMES: Final[frozenset[str]] = frozenset({"NOTICE", 
"NOTICE.txt"})
 _MAX_LICENSE_NOTICE_SIZE: Final[int] = 1024 * 1024
 
 # Types
@@ -154,7 +156,7 @@ async def files(args: checks.FunctionArguments) -> 
results.Results | None:
     log.info(f"Checking license files for {artifact_abs_path} (rel: 
{args.primary_rel_path})")
 
     try:
-        for result in await asyncio.to_thread(_files_check_core_logic, 
archive_dir, is_podling):
+        for result in await asyncio.to_thread(_files_check_core_logic, 
archive_dir, is_podling, is_binary):
             match result:
                 case ArtifactResult():
                     await _record_artifact(recorder, result)
@@ -239,12 +241,73 @@ def headers_validate(content: bytes, _filename: str) -> 
tuple[bool, str | None]:
     return False, "Could not find Apache License header"
 
 
-def _files_check_core_logic(archive_dir: pathlib.Path, is_podling: bool) -> 
Iterator[Result]:
-    """Verify that LICENSE and NOTICE files exist and are placed and formatted 
correctly."""
-    license_results: dict[str, str | None] = {}
-    notice_results: dict[str, tuple[bool, list[str], str]] = {}
-    disclaimer_found = False
+def _files_check_binary(root_path: pathlib.Path) -> Iterator[Result]:
+    license_paths: list[pathlib.Path] = []
+    notice_paths: list[pathlib.Path] = []
+
+    for dirpath, dirnames, filenames in os.walk(root_path):
+        dirnames[:] = sorted(d for d in dirnames if not d.startswith("._"))
+
+        for filename in sorted(filenames):
+            if filename.startswith("._"):
+                continue
+
+            if filename in _BINARY_LICENSE_FILENAMES:
+                license_paths.append(pathlib.Path(dirpath) / filename)
+
+            if filename in _BINARY_NOTICE_FILENAMES:
+                notice_paths.append(pathlib.Path(dirpath) / filename)
+
+    yield _files_check_binary_license(license_paths, root_path)
+    yield _files_check_binary_notice(notice_paths, root_path)
+
 
+def _files_check_binary_license(paths: list[pathlib.Path], root_path: 
pathlib.Path) -> ArtifactResult:
+    if not paths:
+        return ArtifactResult(
+            status=sql.CheckResultStatus.BLOCKER,
+            message="No LICENSE or LICENSE.txt file found",
+            data=None,
+        )
+    for path in paths:
+        if _files_check_core_logic_license(path) is None:
+            rel_path = str(path.relative_to(root_path))
+            return ArtifactResult(
+                status=sql.CheckResultStatus.SUCCESS,
+                message=f"{rel_path} is valid",
+                data=None,
+            )
+    return ArtifactResult(
+        status=sql.CheckResultStatus.FAILURE,
+        message="No valid LICENSE or LICENSE.txt file found",
+        data=None,
+    )
+
+
+def _files_check_binary_notice(paths: list[pathlib.Path], root_path: 
pathlib.Path) -> ArtifactResult:
+    if not paths:
+        return ArtifactResult(
+            status=sql.CheckResultStatus.BLOCKER,
+            message="No NOTICE or NOTICE.txt file found",
+            data=None,
+        )
+    for path in paths:
+        notice_ok, _, _ = _files_check_core_logic_notice(path)
+        if notice_ok:
+            rel_path = str(path.relative_to(root_path))
+            return ArtifactResult(
+                status=sql.CheckResultStatus.SUCCESS,
+                message=f"{rel_path} is valid",
+                data=None,
+            )
+    return ArtifactResult(
+        status=sql.CheckResultStatus.FAILURE,
+        message="No valid NOTICE or NOTICE.txt file found",
+        data=None,
+    )
+
+
+def _files_check_core_logic(archive_dir: pathlib.Path, is_podling: bool, 
is_binary: bool) -> Iterator[Result]:
     if not archive_dir.is_dir():
         # Already protected by the caller
         # We add it here again to make unit testing cleaner
@@ -265,37 +328,12 @@ def _files_check_core_logic(archive_dir: pathlib.Path, 
is_podling: bool) -> Iter
             data=None,
         )
         return
-    root_path = archive_dir / root_dirs[0]
-
-    for entry in sorted(os.listdir(root_path)):
-        if entry.startswith("._"):
-            # Metadata convention
-            continue
-
-        entry_path = root_path / entry
-        if not entry_path.is_file():
-            # Skip subdirectories
-            continue
-
-        if entry == "LICENSE":
-            # TODO: Check length, should be 11,358 bytes
-            license_diff = _files_check_core_logic_license(entry_path)
-            license_results[entry] = license_diff
-        elif entry == "NOTICE":
-            # TODO: Check length doesn't exceed some preset
-            notice_ok, notice_issues, notice_preamble = 
_files_check_core_logic_notice(entry_path)
-            notice_results[entry] = (notice_ok, notice_issues, notice_preamble)
-        elif entry in {"DISCLAIMER", "DISCLAIMER-WIP"}:
-            disclaimer_found = True
 
-    yield from _license_results(license_results)
-    yield from _notice_results(notice_results)
-    if is_podling and (not disclaimer_found):
-        yield ArtifactResult(
-            status=sql.CheckResultStatus.BLOCKER,
-            message="No DISCLAIMER or DISCLAIMER-WIP file found",
-            data=None,
-        )
+    root_path = archive_dir / root_dirs[0]
+    if is_binary:
+        yield from _files_check_binary(root_path)
+    else:
+        yield from _files_check_source(root_path, is_podling)
 
 
 def _files_check_core_logic_license(file_path: pathlib.Path) -> str | None:
@@ -359,6 +397,40 @@ def _files_check_core_logic_notice(file_path: 
pathlib.Path) -> tuple[bool, list[
     return len(issues) == 0, issues, preamble
 
 
+def _files_check_source(root_path: pathlib.Path, is_podling: bool) -> 
Iterator[Result]:
+    license_results: dict[str, str | None] = {}
+    notice_results: dict[str, tuple[bool, list[str], str]] = {}
+    disclaimer_found = False
+
+    for entry in sorted(os.listdir(root_path)):
+        if entry.startswith("._"):
+            continue
+
+        entry_path = root_path / entry
+        if not entry_path.is_file():
+            continue
+
+        if entry == "LICENSE":
+            # TODO: Check length, should be 11,358 bytes
+            license_diff = _files_check_core_logic_license(entry_path)
+            license_results[entry] = license_diff
+        elif entry == "NOTICE":
+            # TODO: Check length doesn't exceed some preset
+            notice_ok, notice_issues, notice_preamble = 
_files_check_core_logic_notice(entry_path)
+            notice_results[entry] = (notice_ok, notice_issues, notice_preamble)
+        elif entry in {"DISCLAIMER", "DISCLAIMER-WIP"}:
+            disclaimer_found = True
+
+    yield from _license_results(license_results)
+    yield from _notice_results(notice_results)
+    if is_podling and (not disclaimer_found):
+        yield ArtifactResult(
+            status=sql.CheckResultStatus.BLOCKER,
+            message="No DISCLAIMER or DISCLAIMER-WIP file found",
+            data=None,
+        )
+
+
 def _get_file_extension(filename: str) -> str | None:
     """Get the file extension without the dot."""
     _, ext = os.path.splitext(filename)
diff --git a/tests/unit/test_checks_license.py 
b/tests/unit/test_checks_license.py
index e9057e12..d58b45c8 100644
--- a/tests/unit/test_checks_license.py
+++ b/tests/unit/test_checks_license.py
@@ -34,8 +34,68 @@ NOTICE_VALID: str = (
 )
 
 
+def test_files_binary_license_notice_in_subdir(tmp_path):
+    cache_dir = _cache_with_root(tmp_path)
+    root = cache_dir / "apache-test-0.2"
+    meta_inf = root / "META-INF"
+    meta_inf.mkdir()
+    (meta_inf / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
+    (meta_inf / "NOTICE").write_text(NOTICE_VALID)
+    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False, is_binary=True))
+    artifact_results = [r for r in results if isinstance(r, 
license.ArtifactResult)]
+    assert all(r.status == sql.CheckResultStatus.SUCCESS for r in 
artifact_results)
+
+
+def test_files_binary_license_txt_notice_txt_nested(tmp_path):
+    cache_dir = _cache_with_root(tmp_path)
+    root = cache_dir / "apache-test-0.2"
+    nested = root / "lib" / "inner"
+    nested.mkdir(parents=True)
+    (nested / "LICENSE.txt").write_text(constants.APACHE_LICENSE_2_0)
+    (nested / "NOTICE.txt").write_text(NOTICE_VALID)
+    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False, is_binary=True))
+    artifact_results = [r for r in results if isinstance(r, 
license.ArtifactResult)]
+    assert all(r.status == sql.CheckResultStatus.SUCCESS for r in 
artifact_results)
+
+
+def test_files_binary_missing_license(tmp_path):
+    cache_dir = _cache_with_root(tmp_path)
+    root = cache_dir / "apache-test-0.2"
+    (root / "NOTICE").write_text(NOTICE_VALID)
+    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False, is_binary=True))
+    blockers = [
+        r for r in results if isinstance(r, license.ArtifactResult) and 
(r.status == sql.CheckResultStatus.BLOCKER)
+    ]
+    assert any("LICENSE" in r.message for r in blockers)
+
+
+def test_files_binary_missing_notice(tmp_path):
+    cache_dir = _cache_with_root(tmp_path)
+    root = cache_dir / "apache-test-0.2"
+    (root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
+    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False, is_binary=True))
+    blockers = [
+        r for r in results if isinstance(r, license.ArtifactResult) and 
(r.status == sql.CheckResultStatus.BLOCKER)
+    ]
+    assert any("NOTICE" in r.message for r in blockers)
+
+
+def test_files_binary_multiple_license_no_failure(tmp_path):
+    cache_dir = _cache_with_root(tmp_path)
+    root = cache_dir / "apache-test-0.2"
+    (root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
+    (root / "NOTICE").write_text(NOTICE_VALID)
+    meta_inf = root / "META-INF"
+    meta_inf.mkdir()
+    (meta_inf / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
+    (meta_inf / "NOTICE").write_text(NOTICE_VALID)
+    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False, is_binary=True))
+    artifact_results = [r for r in results if isinstance(r, 
license.ArtifactResult)]
+    assert all(r.status == sql.CheckResultStatus.SUCCESS for r in 
artifact_results)
+
+
 def test_files_missing_cache_dir():
-    results = 
list(license._files_check_core_logic(pathlib.Path("/nonexistent"), 
is_podling=False))
+    results = 
list(license._files_check_core_logic(pathlib.Path("/nonexistent"), 
is_podling=False, is_binary=False))
     assert len(results) == 1
     assert results[0].status == sql.CheckResultStatus.FAILURE
     assert "not available" in results[0].message.lower()
@@ -46,7 +106,7 @@ def test_files_multiple_root_dirs(tmp_path):
     cache_dir.mkdir()
     (cache_dir / "root-a").mkdir()
     (cache_dir / "root-b").mkdir()
-    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False))
+    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False, is_binary=False))
     assert len(results) >= 1
     assert results[0].status == sql.CheckResultStatus.FAILURE
     assert "root directory" in results[0].message.lower()
@@ -56,7 +116,7 @@ def test_files_no_root_dirs(tmp_path):
     cache_dir = tmp_path / "cache"
     cache_dir.mkdir()
     (cache_dir / "LICENSE").write_text("stray file")
-    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False))
+    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False, is_binary=False))
     assert len(results) >= 1
     assert results[0].status == sql.CheckResultStatus.FAILURE
     assert "0" in results[0].message
@@ -67,7 +127,7 @@ def test_files_podling_without_disclaimer(tmp_path):
     root = cache_dir / "apache-test-0.2"
     (root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
     (root / "NOTICE").write_text(NOTICE_VALID)
-    results = list(license._files_check_core_logic(cache_dir, is_podling=True))
+    results = list(license._files_check_core_logic(cache_dir, is_podling=True, 
is_binary=False))
     assert any(isinstance(r, license.ArtifactResult) and (r.status == 
sql.CheckResultStatus.BLOCKER) for r in results)
 
 
@@ -78,17 +138,32 @@ def 
test_files_single_root_with_stray_top_level_file(tmp_path):
     (root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
     (root / "NOTICE").write_text(NOTICE_VALID)
     (cache_dir / "stray.txt").write_text("ignored")
-    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False))
+    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False, is_binary=False))
     statuses = [r.status for r in results if isinstance(r, 
license.ArtifactResult)]
     assert sql.CheckResultStatus.SUCCESS in statuses
 
 
+def test_files_source_nested_license_notice_ignored(tmp_path):
+    cache_dir = _cache_with_root(tmp_path)
+    root = cache_dir / "apache-test-0.2"
+    meta_inf = root / "META-INF"
+    meta_inf.mkdir()
+    (meta_inf / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
+    (meta_inf / "NOTICE").write_text(NOTICE_VALID)
+    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False, is_binary=False))
+    blockers = [
+        r for r in results if isinstance(r, license.ArtifactResult) and 
(r.status == sql.CheckResultStatus.BLOCKER)
+    ]
+    assert any("LICENSE" in r.message for r in blockers)
+    assert any("NOTICE" in r.message for r in blockers)
+
+
 def test_files_valid_license_and_notice(tmp_path):
     cache_dir = _cache_with_root(tmp_path)
     root = cache_dir / "apache-test-0.2"
     (root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
     (root / "NOTICE").write_text(NOTICE_VALID)
-    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False))
+    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False, is_binary=False))
     artifact_results = [r for r in results if isinstance(r, 
license.ArtifactResult)]
     assert all(r.status == sql.CheckResultStatus.SUCCESS for r in 
artifact_results)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to