This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/sbp by this push:
new 554490a0 Search for license and notice files recursively in binary
archives
554490a0 is described below
commit 554490a06ff9bad706cffdb5aa50185b0ca8c478
Author: Sean B. Palmer <[email protected]>
AuthorDate: Wed Mar 18 16:11:01 2026 +0000
Search for license and notice files recursively in binary archives
---
atr/tasks/checks/license.py | 146 ++++++++++++++++++++++++++++----------
tests/unit/test_checks_license.py | 87 +++++++++++++++++++++--
2 files changed, 190 insertions(+), 43 deletions(-)
diff --git a/atr/tasks/checks/license.py b/atr/tasks/checks/license.py
index 722e160f..7946b0b2 100644
--- a/atr/tasks/checks/license.py
+++ b/atr/tasks/checks/license.py
@@ -81,9 +81,11 @@ INCLUDED_PATTERNS: Final[list[str]] = [
# Release policy fields which this check relies on - used for result caching
INPUT_POLICY_KEYS: Final[list[str]] = ["license_check_mode",
"source_excludes_lightweight"]
INPUT_EXTRA_ARGS: Final[list[str]] = ["is_podling"]
-CHECK_VERSION_FILES: Final[str] = "3"
+CHECK_VERSION_FILES: Final[str] = "4"
CHECK_VERSION_HEADERS: Final[str] = "3"
+_BINARY_LICENSE_FILENAMES: Final[frozenset[str]] = frozenset({"LICENSE",
"LICENSE.txt"})
+_BINARY_NOTICE_FILENAMES: Final[frozenset[str]] = frozenset({"NOTICE",
"NOTICE.txt"})
_MAX_LICENSE_NOTICE_SIZE: Final[int] = 1024 * 1024
# Types
@@ -154,7 +156,7 @@ async def files(args: checks.FunctionArguments) ->
results.Results | None:
log.info(f"Checking license files for {artifact_abs_path} (rel:
{args.primary_rel_path})")
try:
- for result in await asyncio.to_thread(_files_check_core_logic,
archive_dir, is_podling):
+ for result in await asyncio.to_thread(_files_check_core_logic,
archive_dir, is_podling, is_binary):
match result:
case ArtifactResult():
await _record_artifact(recorder, result)
@@ -239,12 +241,73 @@ def headers_validate(content: bytes, _filename: str) ->
tuple[bool, str | None]:
return False, "Could not find Apache License header"
-def _files_check_core_logic(archive_dir: pathlib.Path, is_podling: bool) ->
Iterator[Result]:
- """Verify that LICENSE and NOTICE files exist and are placed and formatted
correctly."""
- license_results: dict[str, str | None] = {}
- notice_results: dict[str, tuple[bool, list[str], str]] = {}
- disclaimer_found = False
+def _files_check_binary(root_path: pathlib.Path) -> Iterator[Result]:
+ license_paths: list[pathlib.Path] = []
+ notice_paths: list[pathlib.Path] = []
+
+ for dirpath, dirnames, filenames in os.walk(root_path):
+ dirnames[:] = sorted(d for d in dirnames if not d.startswith("._"))
+
+ for filename in sorted(filenames):
+ if filename.startswith("._"):
+ continue
+
+ if filename in _BINARY_LICENSE_FILENAMES:
+ license_paths.append(pathlib.Path(dirpath) / filename)
+
+ if filename in _BINARY_NOTICE_FILENAMES:
+ notice_paths.append(pathlib.Path(dirpath) / filename)
+
+ yield _files_check_binary_license(license_paths, root_path)
+ yield _files_check_binary_notice(notice_paths, root_path)
+
+def _files_check_binary_license(paths: list[pathlib.Path], root_path:
pathlib.Path) -> ArtifactResult:
+ if not paths:
+ return ArtifactResult(
+ status=sql.CheckResultStatus.BLOCKER,
+ message="No LICENSE or LICENSE.txt file found",
+ data=None,
+ )
+ for path in paths:
+ if _files_check_core_logic_license(path) is None:
+ rel_path = str(path.relative_to(root_path))
+ return ArtifactResult(
+ status=sql.CheckResultStatus.SUCCESS,
+ message=f"{rel_path} is valid",
+ data=None,
+ )
+ return ArtifactResult(
+ status=sql.CheckResultStatus.FAILURE,
+ message="No valid LICENSE or LICENSE.txt file found",
+ data=None,
+ )
+
+
+def _files_check_binary_notice(paths: list[pathlib.Path], root_path:
pathlib.Path) -> ArtifactResult:
+ if not paths:
+ return ArtifactResult(
+ status=sql.CheckResultStatus.BLOCKER,
+ message="No NOTICE or NOTICE.txt file found",
+ data=None,
+ )
+ for path in paths:
+ notice_ok, _, _ = _files_check_core_logic_notice(path)
+ if notice_ok:
+ rel_path = str(path.relative_to(root_path))
+ return ArtifactResult(
+ status=sql.CheckResultStatus.SUCCESS,
+ message=f"{rel_path} is valid",
+ data=None,
+ )
+ return ArtifactResult(
+ status=sql.CheckResultStatus.FAILURE,
+ message="No valid NOTICE or NOTICE.txt file found",
+ data=None,
+ )
+
+
+def _files_check_core_logic(archive_dir: pathlib.Path, is_podling: bool,
is_binary: bool) -> Iterator[Result]:
if not archive_dir.is_dir():
# Already protected by the caller
# We add it here again to make unit testing cleaner
@@ -265,37 +328,12 @@ def _files_check_core_logic(archive_dir: pathlib.Path,
is_podling: bool) -> Iter
data=None,
)
return
- root_path = archive_dir / root_dirs[0]
-
- for entry in sorted(os.listdir(root_path)):
- if entry.startswith("._"):
- # Metadata convention
- continue
-
- entry_path = root_path / entry
- if not entry_path.is_file():
- # Skip subdirectories
- continue
-
- if entry == "LICENSE":
- # TODO: Check length, should be 11,358 bytes
- license_diff = _files_check_core_logic_license(entry_path)
- license_results[entry] = license_diff
- elif entry == "NOTICE":
- # TODO: Check length doesn't exceed some preset
- notice_ok, notice_issues, notice_preamble =
_files_check_core_logic_notice(entry_path)
- notice_results[entry] = (notice_ok, notice_issues, notice_preamble)
- elif entry in {"DISCLAIMER", "DISCLAIMER-WIP"}:
- disclaimer_found = True
- yield from _license_results(license_results)
- yield from _notice_results(notice_results)
- if is_podling and (not disclaimer_found):
- yield ArtifactResult(
- status=sql.CheckResultStatus.BLOCKER,
- message="No DISCLAIMER or DISCLAIMER-WIP file found",
- data=None,
- )
+ root_path = archive_dir / root_dirs[0]
+ if is_binary:
+ yield from _files_check_binary(root_path)
+ else:
+ yield from _files_check_source(root_path, is_podling)
def _files_check_core_logic_license(file_path: pathlib.Path) -> str | None:
@@ -359,6 +397,40 @@ def _files_check_core_logic_notice(file_path:
pathlib.Path) -> tuple[bool, list[
return len(issues) == 0, issues, preamble
+def _files_check_source(root_path: pathlib.Path, is_podling: bool) ->
Iterator[Result]:
+ license_results: dict[str, str | None] = {}
+ notice_results: dict[str, tuple[bool, list[str], str]] = {}
+ disclaimer_found = False
+
+ for entry in sorted(os.listdir(root_path)):
+ if entry.startswith("._"):
+ continue
+
+ entry_path = root_path / entry
+ if not entry_path.is_file():
+ continue
+
+ if entry == "LICENSE":
+ # TODO: Check length, should be 11,358 bytes
+ license_diff = _files_check_core_logic_license(entry_path)
+ license_results[entry] = license_diff
+ elif entry == "NOTICE":
+ # TODO: Check length doesn't exceed some preset
+ notice_ok, notice_issues, notice_preamble =
_files_check_core_logic_notice(entry_path)
+ notice_results[entry] = (notice_ok, notice_issues, notice_preamble)
+ elif entry in {"DISCLAIMER", "DISCLAIMER-WIP"}:
+ disclaimer_found = True
+
+ yield from _license_results(license_results)
+ yield from _notice_results(notice_results)
+ if is_podling and (not disclaimer_found):
+ yield ArtifactResult(
+ status=sql.CheckResultStatus.BLOCKER,
+ message="No DISCLAIMER or DISCLAIMER-WIP file found",
+ data=None,
+ )
+
+
def _get_file_extension(filename: str) -> str | None:
"""Get the file extension without the dot."""
_, ext = os.path.splitext(filename)
diff --git a/tests/unit/test_checks_license.py
b/tests/unit/test_checks_license.py
index e9057e12..d58b45c8 100644
--- a/tests/unit/test_checks_license.py
+++ b/tests/unit/test_checks_license.py
@@ -34,8 +34,68 @@ NOTICE_VALID: str = (
)
+def test_files_binary_license_notice_in_subdir(tmp_path):
+ cache_dir = _cache_with_root(tmp_path)
+ root = cache_dir / "apache-test-0.2"
+ meta_inf = root / "META-INF"
+ meta_inf.mkdir()
+ (meta_inf / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
+ (meta_inf / "NOTICE").write_text(NOTICE_VALID)
+ results = list(license._files_check_core_logic(cache_dir,
is_podling=False, is_binary=True))
+ artifact_results = [r for r in results if isinstance(r,
license.ArtifactResult)]
+ assert all(r.status == sql.CheckResultStatus.SUCCESS for r in
artifact_results)
+
+
+def test_files_binary_license_txt_notice_txt_nested(tmp_path):
+ cache_dir = _cache_with_root(tmp_path)
+ root = cache_dir / "apache-test-0.2"
+ nested = root / "lib" / "inner"
+ nested.mkdir(parents=True)
+ (nested / "LICENSE.txt").write_text(constants.APACHE_LICENSE_2_0)
+ (nested / "NOTICE.txt").write_text(NOTICE_VALID)
+ results = list(license._files_check_core_logic(cache_dir,
is_podling=False, is_binary=True))
+ artifact_results = [r for r in results if isinstance(r,
license.ArtifactResult)]
+ assert all(r.status == sql.CheckResultStatus.SUCCESS for r in
artifact_results)
+
+
+def test_files_binary_missing_license(tmp_path):
+ cache_dir = _cache_with_root(tmp_path)
+ root = cache_dir / "apache-test-0.2"
+ (root / "NOTICE").write_text(NOTICE_VALID)
+ results = list(license._files_check_core_logic(cache_dir,
is_podling=False, is_binary=True))
+ blockers = [
+ r for r in results if isinstance(r, license.ArtifactResult) and
(r.status == sql.CheckResultStatus.BLOCKER)
+ ]
+ assert any("LICENSE" in r.message for r in blockers)
+
+
+def test_files_binary_missing_notice(tmp_path):
+ cache_dir = _cache_with_root(tmp_path)
+ root = cache_dir / "apache-test-0.2"
+ (root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
+ results = list(license._files_check_core_logic(cache_dir,
is_podling=False, is_binary=True))
+ blockers = [
+ r for r in results if isinstance(r, license.ArtifactResult) and
(r.status == sql.CheckResultStatus.BLOCKER)
+ ]
+ assert any("NOTICE" in r.message for r in blockers)
+
+
+def test_files_binary_multiple_license_no_failure(tmp_path):
+ cache_dir = _cache_with_root(tmp_path)
+ root = cache_dir / "apache-test-0.2"
+ (root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
+ (root / "NOTICE").write_text(NOTICE_VALID)
+ meta_inf = root / "META-INF"
+ meta_inf.mkdir()
+ (meta_inf / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
+ (meta_inf / "NOTICE").write_text(NOTICE_VALID)
+ results = list(license._files_check_core_logic(cache_dir,
is_podling=False, is_binary=True))
+ artifact_results = [r for r in results if isinstance(r,
license.ArtifactResult)]
+ assert all(r.status == sql.CheckResultStatus.SUCCESS for r in
artifact_results)
+
+
def test_files_missing_cache_dir():
- results =
list(license._files_check_core_logic(pathlib.Path("/nonexistent"),
is_podling=False))
+ results =
list(license._files_check_core_logic(pathlib.Path("/nonexistent"),
is_podling=False, is_binary=False))
assert len(results) == 1
assert results[0].status == sql.CheckResultStatus.FAILURE
assert "not available" in results[0].message.lower()
@@ -46,7 +106,7 @@ def test_files_multiple_root_dirs(tmp_path):
cache_dir.mkdir()
(cache_dir / "root-a").mkdir()
(cache_dir / "root-b").mkdir()
- results = list(license._files_check_core_logic(cache_dir,
is_podling=False))
+ results = list(license._files_check_core_logic(cache_dir,
is_podling=False, is_binary=False))
assert len(results) >= 1
assert results[0].status == sql.CheckResultStatus.FAILURE
assert "root directory" in results[0].message.lower()
@@ -56,7 +116,7 @@ def test_files_no_root_dirs(tmp_path):
cache_dir = tmp_path / "cache"
cache_dir.mkdir()
(cache_dir / "LICENSE").write_text("stray file")
- results = list(license._files_check_core_logic(cache_dir,
is_podling=False))
+ results = list(license._files_check_core_logic(cache_dir,
is_podling=False, is_binary=False))
assert len(results) >= 1
assert results[0].status == sql.CheckResultStatus.FAILURE
assert "0" in results[0].message
@@ -67,7 +127,7 @@ def test_files_podling_without_disclaimer(tmp_path):
root = cache_dir / "apache-test-0.2"
(root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
(root / "NOTICE").write_text(NOTICE_VALID)
- results = list(license._files_check_core_logic(cache_dir, is_podling=True))
+ results = list(license._files_check_core_logic(cache_dir, is_podling=True,
is_binary=False))
assert any(isinstance(r, license.ArtifactResult) and (r.status ==
sql.CheckResultStatus.BLOCKER) for r in results)
@@ -78,17 +138,32 @@ def
test_files_single_root_with_stray_top_level_file(tmp_path):
(root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
(root / "NOTICE").write_text(NOTICE_VALID)
(cache_dir / "stray.txt").write_text("ignored")
- results = list(license._files_check_core_logic(cache_dir,
is_podling=False))
+ results = list(license._files_check_core_logic(cache_dir,
is_podling=False, is_binary=False))
statuses = [r.status for r in results if isinstance(r,
license.ArtifactResult)]
assert sql.CheckResultStatus.SUCCESS in statuses
+def test_files_source_nested_license_notice_ignored(tmp_path):
+ cache_dir = _cache_with_root(tmp_path)
+ root = cache_dir / "apache-test-0.2"
+ meta_inf = root / "META-INF"
+ meta_inf.mkdir()
+ (meta_inf / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
+ (meta_inf / "NOTICE").write_text(NOTICE_VALID)
+ results = list(license._files_check_core_logic(cache_dir,
is_podling=False, is_binary=False))
+ blockers = [
+ r for r in results if isinstance(r, license.ArtifactResult) and
(r.status == sql.CheckResultStatus.BLOCKER)
+ ]
+ assert any("LICENSE" in r.message for r in blockers)
+ assert any("NOTICE" in r.message for r in blockers)
+
+
def test_files_valid_license_and_notice(tmp_path):
cache_dir = _cache_with_root(tmp_path)
root = cache_dir / "apache-test-0.2"
(root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
(root / "NOTICE").write_text(NOTICE_VALID)
- results = list(license._files_check_core_logic(cache_dir,
is_podling=False))
+ results = list(license._files_check_core_logic(cache_dir,
is_podling=False, is_binary=False))
artifact_results = [r for r in results if isinstance(r,
license.ArtifactResult)]
assert all(r.status == sql.CheckResultStatus.SUCCESS for r in
artifact_results)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]