This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/main by this push:
     new 99013e9  Use exclusions from release policies in lightweight license 
checks
99013e9 is described below

commit 99013e96d18dab83a5fc7cdcbf3432c505771678
Author: Sean B. Palmer <[email protected]>
AuthorDate: Fri Jan 9 19:06:58 2026 +0000

    Use exclusions from release policies in lightweight license checks
---
 atr/tasks/checks/license.py        | 209 ++++++++++++++++++-------------------
 atr/tasks/gha.py                   |   2 +-
 pyproject.toml                     |   3 +
 tests/unit/test_license_headers.py |  61 +++++++++++
 4 files changed, 169 insertions(+), 106 deletions(-)

diff --git a/atr/tasks/checks/license.py b/atr/tasks/checks/license.py
index fbde365..0e1a0c9 100644
--- a/atr/tasks/checks/license.py
+++ b/atr/tasks/checks/license.py
@@ -25,7 +25,6 @@ from collections.abc import Iterator
 from typing import Any, Final
 
 import atr.constants as constants
-import atr.db as db
 import atr.log as log
 import atr.models.results as results
 import atr.models.schema as schema
@@ -88,6 +87,7 @@ class ArtifactData(schema.Strict):
     files_with_valid_headers: int = schema.default(0)
     files_with_invalid_headers: int = schema.default(0)
     files_skipped: int = schema.default(0)
+    excludes_source: str = schema.default("none")
 
 
 class ArtifactResult(schema.Strict):
@@ -172,17 +172,18 @@ async def headers(args: checks.FunctionArguments) -> 
results.Results | None:
 
     log.info(f"Checking license headers for {artifact_abs_path} (rel: 
{args.primary_rel_path})")
 
-    async with db.session() as data:
-        release = await data.release(project_name=args.project_name, 
version=args.version_name).get()
-    ignore_lines = []
-    if release is not None:
-        release_directory_base = util.release_directory_base(release)
-        release_directory_revision = release_directory_base / 
args.revision_number
-        ignore_file = release_directory_revision / ".atr" / 
"license-headers-ignore"
-        if ignore_file.exists():
-            ignore_lines = ignore_file.read_text().splitlines()
+    is_source = await recorder.primary_path_is_source()
+    project = await recorder.project()
 
-    return await _headers_core(recorder, str(artifact_abs_path), ignore_lines)
+    ignore_lines: list[str] = []
+    excludes_source: str
+    if is_source:
+        ignore_lines = project.policy_source_excludes_lightweight
+        excludes_source = "policy" if ignore_lines else "none"
+    else:
+        excludes_source = "none"
+
+    return await _headers_core(recorder, str(artifact_abs_path), ignore_lines, 
excludes_source)
 
 
 def headers_validate(content: bytes, _filename: str) -> tuple[bool, str | 
None]:
@@ -213,9 +214,6 @@ def headers_validate(content: bytes, _filename: str) -> 
tuple[bool, str | None]:
     return False, "Could not find Apache License header"
 
 
-# File helpers
-
-
 def _files_check_core_logic(artifact_path: str, is_podling: bool) -> 
Iterator[Result]:
     """Verify that LICENSE and NOTICE files exist and are placed and formatted 
correctly."""
     license_results: dict[str, str | None] = {}
@@ -314,92 +312,6 @@ def _files_check_core_logic_notice(archive: 
tarzip.Archive, member: tarzip.Membe
     return len(issues) == 0, issues, preamble
 
 
-def _license_results(
-    license_results: dict[str, str | None],
-) -> Iterator[Result]:
-    """Build status messages for license file verification."""
-    license_files_size = len(license_results)
-    if license_files_size == 0:
-        yield ArtifactResult(
-            status=sql.CheckResultStatus.FAILURE,
-            message="No LICENSE file found",
-            data=None,
-        )
-        return
-
-    if license_files_size > 1:
-        yield ArtifactResult(
-            status=sql.CheckResultStatus.FAILURE,
-            message="Multiple LICENSE files found",
-            data=None,
-        )
-        return
-
-    for filename, license_diff in license_results.items():
-        # Unpack the single result by iterating
-        if license_diff is None:
-            yield ArtifactResult(
-                status=sql.CheckResultStatus.SUCCESS,
-                message=f"{filename} is valid",
-                data=None,
-            )
-        else:
-            yield ArtifactResult(
-                status=sql.CheckResultStatus.FAILURE,
-                message=f"{filename} is invalid",
-                data={"diff": license_diff},
-            )
-
-
-def _normal_whitespace(lines: list[str]) -> list[str]:
-    result = []
-    for line in lines:
-        line = line.strip()
-        if line:
-            result.append(line)
-    return result
-
-
-def _notice_results(
-    notice_results: dict[str, tuple[bool, list[str], str]],
-) -> Iterator[Result]:
-    """Build status messages for notice file verification."""
-    notice_files_size = len(notice_results)
-    if notice_files_size == 0:
-        yield ArtifactResult(
-            status=sql.CheckResultStatus.FAILURE,
-            message="No NOTICE file found",
-            data=None,
-        )
-        return
-
-    if notice_files_size > 1:
-        yield ArtifactResult(
-            status=sql.CheckResultStatus.FAILURE,
-            message="Multiple NOTICE files found",
-            data=None,
-        )
-        return
-
-    for filename, (notice_ok, notice_issues, notice_preamble) in 
notice_results.items():
-        # Unpack the single result by iterating
-        if notice_ok:
-            yield ArtifactResult(
-                status=sql.CheckResultStatus.SUCCESS,
-                message=f"{filename} is valid",
-                data=None,
-            )
-        else:
-            yield ArtifactResult(
-                status=sql.CheckResultStatus.FAILURE,
-                message=f"{filename} is invalid",
-                data={"issues": notice_issues, "preamble": notice_preamble},
-            )
-
-
-# Header helpers
-
-
 def _get_file_extension(filename: str) -> str | None:
     """Get the file extension without the dot."""
     _, ext = os.path.splitext(filename)
@@ -408,12 +320,12 @@ def _get_file_extension(filename: str) -> str | None:
     return ext[1:].lower()
 
 
-def _headers_check_core_logic(artifact_path: str, ignore_lines: list[str]) -> 
Iterator[Result]:
+def _headers_check_core_logic(artifact_path: str, ignore_lines: list[str], 
excludes_source: str) -> Iterator[Result]:
     """Verify Apache License headers in source files within an archive."""
     # We could modify @Lucas-C/pre-commit-hooks instead for this
     # But hopefully this will be robust enough, at least for testing
     # First find and validate the root directory
-    artifact_data = ArtifactData()
+    artifact_data = ArtifactData(excludes_source=excludes_source)
 
     # try:
     #     targz.root_directory(artifact_path)
@@ -464,7 +376,7 @@ def _headers_check_core_logic(artifact_path: str, 
ignore_lines: list[str]) -> It
         f" found {artifact_data.files_with_valid_headers} with valid headers,"
         f" {artifact_data.files_with_invalid_headers} with invalid headers,"
         f" and {artifact_data.files_skipped} skipped",
-        data=artifact_data.model_dump_json(),
+        data=artifact_data.model_dump(),
     )
 
 
@@ -541,9 +453,13 @@ def _headers_check_core_logic_should_check(filepath: str) 
-> bool:
     return False
 
 
-async def _headers_core(recorder: checks.Recorder, artifact_abs_path: str, 
ignore_lines: list[str]) -> None:
+async def _headers_core(
+    recorder: checks.Recorder, artifact_abs_path: str, ignore_lines: 
list[str], excludes_source: str
+) -> None:
     try:
-        for result in await asyncio.to_thread(_headers_check_core_logic, 
str(artifact_abs_path), ignore_lines):
+        for result in await asyncio.to_thread(
+            _headers_check_core_logic, str(artifact_abs_path), ignore_lines, 
excludes_source
+        ):
             match result:
                 case ArtifactResult():
                     await _record_artifact(recorder, result)
@@ -564,6 +480,89 @@ async def _headers_core(recorder: checks.Recorder, 
artifact_abs_path: str, ignor
     return None
 
 
+def _license_results(
+    license_results: dict[str, str | None],
+) -> Iterator[Result]:
+    """Build status messages for license file verification."""
+    license_files_size = len(license_results)
+    if license_files_size == 0:
+        yield ArtifactResult(
+            status=sql.CheckResultStatus.FAILURE,
+            message="No LICENSE file found",
+            data=None,
+        )
+        return
+
+    if license_files_size > 1:
+        yield ArtifactResult(
+            status=sql.CheckResultStatus.FAILURE,
+            message="Multiple LICENSE files found",
+            data=None,
+        )
+        return
+
+    for filename, license_diff in license_results.items():
+        # Unpack the single result by iterating
+        if license_diff is None:
+            yield ArtifactResult(
+                status=sql.CheckResultStatus.SUCCESS,
+                message=f"{filename} is valid",
+                data=None,
+            )
+        else:
+            yield ArtifactResult(
+                status=sql.CheckResultStatus.FAILURE,
+                message=f"{filename} is invalid",
+                data={"diff": license_diff},
+            )
+
+
+def _normal_whitespace(lines: list[str]) -> list[str]:
+    result = []
+    for line in lines:
+        line = line.strip()
+        if line:
+            result.append(line)
+    return result
+
+
+def _notice_results(
+    notice_results: dict[str, tuple[bool, list[str], str]],
+) -> Iterator[Result]:
+    """Build status messages for notice file verification."""
+    notice_files_size = len(notice_results)
+    if notice_files_size == 0:
+        yield ArtifactResult(
+            status=sql.CheckResultStatus.FAILURE,
+            message="No NOTICE file found",
+            data=None,
+        )
+        return
+
+    if notice_files_size > 1:
+        yield ArtifactResult(
+            status=sql.CheckResultStatus.FAILURE,
+            message="Multiple NOTICE files found",
+            data=None,
+        )
+        return
+
+    for filename, (notice_ok, notice_issues, notice_preamble) in 
notice_results.items():
+        # Unpack the single result by iterating
+        if notice_ok:
+            yield ArtifactResult(
+                status=sql.CheckResultStatus.SUCCESS,
+                message=f"{filename} is valid",
+                data=None,
+            )
+        else:
+            yield ArtifactResult(
+                status=sql.CheckResultStatus.FAILURE,
+                message=f"{filename} is invalid",
+                data={"issues": notice_issues, "preamble": notice_preamble},
+            )
+
+
 async def _record_artifact(recorder: checks.Recorder, result: ArtifactResult) 
-> None:
     match result.status:
         case sql.CheckResultStatus.SUCCESS:
diff --git a/atr/tasks/gha.py b/atr/tasks/gha.py
index 3c811da..f64b2cc 100644
--- a/atr/tasks/gha.py
+++ b/atr/tasks/gha.py
@@ -97,7 +97,7 @@ async def _find_triggered_run(
 
     def get_run(resp: dict[str, Any]) -> dict[str, Any] | None:
         return next(
-            (r for r in resp["workflow_runs"] if r["head_branch"] == args.ref 
and r["name"] == unique_id),
+            (r for r in resp["workflow_runs"] if (r["head_branch"] == 
args.ref) and (r["name"] == unique_id)),
             None,
         )
 
diff --git a/pyproject.toml b/pyproject.toml
index cdd80d5..ddd5023 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -111,6 +111,9 @@ minversion = "8.0"
 testpaths  = ["tests"]
 asyncio_mode = "auto"
 addopts = "--ignore=tests/e2e"
+filterwarnings = [
+  "ignore:imghdr was removed in Python 3.13:DeprecationWarning:pgpy.constants",
+]
 
 [tool.ruff]
 line-length = 120
diff --git a/tests/unit/test_license_headers.py 
b/tests/unit/test_license_headers.py
new file mode 100644
index 0000000..b1d8d6a
--- /dev/null
+++ b/tests/unit/test_license_headers.py
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pathlib
+
+import atr.tasks.checks.license as license
+
+TEST_ARCHIVE = pathlib.Path(__file__).parent.parent / "e2e" / "test_files" / 
"apache-test-0.2.tar.gz"
+
+
+def test_headers_check_data_fields_match_model():
+    results = list(license._headers_check_core_logic(str(TEST_ARCHIVE), [], 
"none"))
+    artifact_results = [r for r in results if isinstance(r, 
license.ArtifactResult)]
+    final_result = artifact_results[-1]
+    expected_fields = set(license.ArtifactData.model_fields.keys())
+    actual_fields = set(final_result.data.keys())
+    assert actual_fields == expected_fields
+
+
+def test_headers_check_excludes_matching_files():
+    results_without_excludes = 
list(license._headers_check_core_logic(str(TEST_ARCHIVE), [], "none"))
+    results_with_excludes = 
list(license._headers_check_core_logic(str(TEST_ARCHIVE), ["*.py"], "policy"))
+
+    def get_files_checked(results: list) -> int:
+        for r in results:
+            if isinstance(r, license.ArtifactResult) and r.data and 
("files_checked" in r.data):
+                return r.data["files_checked"]
+        return 0
+
+    without_excludes = get_files_checked(results_without_excludes)
+    with_excludes = get_files_checked(results_with_excludes)
+    assert with_excludes < without_excludes
+
+
+def test_headers_check_includes_excludes_source_none():
+    results = list(license._headers_check_core_logic(str(TEST_ARCHIVE), [], 
"none"))
+    artifact_results = [r for r in results if isinstance(r, 
license.ArtifactResult)]
+    assert len(artifact_results) > 0
+    final_result = artifact_results[-1]
+    assert final_result.data["excludes_source"] == "none"
+
+
+def test_headers_check_includes_excludes_source_policy():
+    results = list(license._headers_check_core_logic(str(TEST_ARCHIVE), [], 
"policy"))
+    artifact_results = [r for r in results if isinstance(r, 
license.ArtifactResult)]
+    final_result = artifact_results[-1]
+    assert final_result.data["excludes_source"] == "policy"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to