(tooling-trusted-releases) 01/01: Use exclusions from release policies in RAT checks

sbp Fri, 09 Jan 2026 12:12:14 -0800

This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


commit 702b4b3c93e584a92617ffb8980db315d1906f25
Author: Sean B. Palmer <[email protected]>
AuthorDate: Fri Jan 9 20:06:37 2026 +0000

    Use exclusions from release policies in RAT checks
---
 .github/workflows/build.yml                        |  2 +-
 atr/archives.py                                    |  2 +-
 atr/tasks/checks/rat.py                            | 95 +++++++++++++++++-----
 ...t_license_headers.py => test_checks_license.py} |  0
 tests/unit/test_checks_rat.py                      | 53 ++++++++++++
 5 files changed, 130 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 70a474c..bcec85f 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -45,7 +45,7 @@ jobs:
 
       - name: Run unit tests
         run: |
-          uv run --no-sync pytest -rs tests
+          uv run --frozen pytest -rs tests
 
       - name: Run Playwright end-to-end tests
         run: |
diff --git a/atr/archives.py b/atr/archives.py
index 2560c95..0f87d88 100644
--- a/atr/archives.py
+++ b/atr/archives.py
@@ -119,7 +119,7 @@ def _archive_extract_member(  # noqa: C901
         if _safe_path(extract_dir, member.name) is None:
             log.warning(f"Skipping potentially unsafe path: {member.name}")
             return 0, extracted_paths
-        tf.extract(member, extract_dir, numeric_owner=True)
+        tf.extract(member, extract_dir, numeric_owner=True, 
filter="fully_trusted")
 
     elif member.isreg():
         extracted_size = _archive_extract_safe_process_file(
diff --git a/atr/tasks/checks/rat.py b/atr/tasks/checks/rat.py
index 6988367..bbe4bce 100644
--- a/atr/tasks/checks/rat.py
+++ b/atr/tasks/checks/rat.py
@@ -47,6 +47,9 @@ _JAVA_MEMORY_ARGS: Final[list[str]] = []
 # Generated file patterns, always excluded
 _GENERATED_FILE_PATTERNS: Final[list[str]] = [f"**/*{s}" for s in 
constants.GENERATED_FILE_SUFFIXES]
 
+# The name of the temp file for excludes defined in release policies
+_POLICY_EXCLUDES_FILENAME: Final[str] = ".atr-rat-excludes"
+
 # The name of the file that contains the exclusions for the specified archive
 _RAT_EXCLUDES_FILENAME: Final[str] = ".rat-excludes"
 
@@ -86,8 +89,11 @@ async def check(args: checks.FunctionArguments) -> 
results.Results | None:
 
     log.info(f"Checking RAT licenses for {artifact_abs_path} (rel: 
{args.primary_rel_path})")
 
+    is_source = await recorder.primary_path_is_source()
+    policy_excludes = project.policy_source_excludes_rat if is_source else []
+
     try:
-        await _check_core(args, recorder, artifact_abs_path)
+        await _check_core(args, recorder, artifact_abs_path, policy_excludes)
     except Exception as e:
         # TODO: Or bubble for task failure?
         await recorder.failure("Error running Apache RAT check", {"error": 
str(e)})
@@ -98,7 +104,8 @@ async def check(args: checks.FunctionArguments) -> 
results.Results | None:
 def _build_rat_command(
     rat_jar_path: str,
     xml_output_path: str,
-    exclude_file: str | None,
+    excludes_file: str | None,
+    apply_extended_std: bool,
 ) -> list[str]:
     """Build the RAT command with appropriate exclusions."""
     command = [
@@ -121,7 +128,7 @@ def _build_rat_command(
     for std in _STD_EXCLUSIONS_ALWAYS:
         command.extend(["--input-exclude-std", std])
 
-    if exclude_file is None:
+    if apply_extended_std:
         for std in _STD_EXCLUSIONS_EXTENDED:
             command.extend(["--input-exclude-std", std])
 
@@ -129,10 +136,11 @@ def _build_rat_command(
         command.extend(["--input-exclude", pattern])
 
     command.extend(["--input-exclude", _RAT_REPORT_FILENAME])
+    command.extend(["--input-exclude", _POLICY_EXCLUDES_FILENAME])
 
-    if exclude_file is not None:
+    if excludes_file is not None:
         command.extend(["--input-exclude", _RAT_EXCLUDES_FILENAME])
-        command.extend(["--input-exclude-file", exclude_file])
+        command.extend(["--input-exclude-file", excludes_file])
 
     command.extend(["--", "."])
 
@@ -140,11 +148,15 @@ def _build_rat_command(
 
 
 async def _check_core(
-    args: checks.FunctionArguments, recorder: checks.Recorder, 
artifact_abs_path: pathlib.Path
+    args: checks.FunctionArguments,
+    recorder: checks.Recorder,
+    artifact_abs_path: pathlib.Path,
+    policy_excludes: list[str],
 ) -> None:
     result_data = await asyncio.to_thread(
         _check_core_logic,
         artifact_path=str(artifact_abs_path),
+        policy_excludes=policy_excludes,
         rat_jar_path=args.extra_args.get("rat_jar_path", 
_CONFIG.APACHE_RAT_JAR_PATH),
         max_extract_size=args.extra_args.get("max_extract_size", 
_CONFIG.MAX_EXTRACT_SIZE),
         chunk_size=args.extra_args.get("chunk_size", 
_CONFIG.EXTRACT_CHUNK_SIZE),
@@ -187,6 +199,7 @@ async def _check_core(
 
 def _check_core_logic(  # noqa: C901
     artifact_path: str,
+    policy_excludes: list[str],
     rat_jar_path: str = _CONFIG.APACHE_RAT_JAR_PATH,
     max_extract_size: int = _CONFIG.MAX_EXTRACT_SIZE,
     chunk_size: int = _CONFIG.EXTRACT_CHUNK_SIZE,
@@ -256,14 +269,35 @@ def _check_core_logic(  # noqa: C901
                     "unapproved_files": [],
                     "unknown_license_files": [],
                     "errors": [f"Found {len(exclude_file_paths)} 
{_RAT_EXCLUDES_FILENAME} files"],
+                    "excludes_source": "unknown",
                 }
 
             # Narrow to single path after validation
-            exclude_file_path: str | None = exclude_file_paths[0] if 
exclude_file_paths else None
+            archive_excludes_path: str | None = exclude_file_paths[0] if 
exclude_file_paths else None
+
+            # Determine excludes_source and effective excludes file
+            excludes_source: str
+            effective_excludes_path: str | None
+
+            if archive_excludes_path is not None:
+                excludes_source = "archive"
+                effective_excludes_path = archive_excludes_path
+                log.info(f"Using archive {_RAT_EXCLUDES_FILENAME}: 
{archive_excludes_path}")
+            elif policy_excludes:
+                excludes_source = "policy"
+                policy_excludes_file = os.path.join(temp_dir, 
_POLICY_EXCLUDES_FILENAME)
+                with open(policy_excludes_file, "w") as f:
+                    f.write("\n".join(policy_excludes))
+                effective_excludes_path = 
os.path.relpath(policy_excludes_file, temp_dir)
+                log.info(f"Using policy excludes written to: 
{policy_excludes_file}")
+            else:
+                excludes_source = "none"
+                effective_excludes_path = None
+                log.info("No excludes: using defaults only")
 
-            # Determine scan root
-            if exclude_file_path is not None:
-                scan_root = os.path.dirname(os.path.join(temp_dir, 
exclude_file_path))
+            # Determine scan root based on archive .rat-excludes location
+            if archive_excludes_path is not None:
+                scan_root = os.path.dirname(os.path.join(temp_dir, 
archive_excludes_path))
 
                 # Verify that scan_root is inside temp_dir
                 abs_scan_root = os.path.abspath(scan_root)
@@ -281,6 +315,7 @@ def _check_core_logic(  # noqa: C901
                         "unapproved_files": [],
                         "unknown_license_files": [],
                         "errors": ["Exclusion file path escapes extraction 
directory"],
+                        "excludes_source": excludes_source,
                     }
 
                 log.info(f"Using {_RAT_EXCLUDES_FILENAME} directory as scan 
root: {scan_root}")
@@ -298,16 +333,21 @@ def _check_core_logic(  # noqa: C901
                         "unapproved_files": [],
                         "unknown_license_files": [],
                         "errors": [f"{untracked_count} file(s) outside 
{_RAT_EXCLUDES_FILENAME} directory"],
+                        "excludes_source": excludes_source,
                     }
             else:
                 scan_root = temp_dir
-                log.info(f"No {_RAT_EXCLUDES_FILENAME} found, using temp_dir 
as scan root: {scan_root}")
+                log.info(f"No archive {_RAT_EXCLUDES_FILENAME} found, using 
temp_dir as scan root: {scan_root}")
 
             # Execute RAT and get results or error
+            # Extended std exclusions apply when there's no archive 
.rat-excludes
+            apply_extended_std = excludes_source != "archive"
             error_result, xml_output_path = _check_core_logic_execute_rat(
-                rat_jar_path, scan_root, temp_dir, exclude_file_path
+                rat_jar_path, scan_root, temp_dir, effective_excludes_path, 
apply_extended_std, excludes_source
             )
             if error_result:
+                error_result["excludes_source"] = excludes_source
+                error_result["extended_std_applied"] = apply_extended_std
                 return error_result
 
             # Parse the XML output
@@ -335,6 +375,8 @@ def _check_core_logic(  # noqa: C901
                         os.path.normpath(file["name"]),
                     )
 
+            results["excludes_source"] = excludes_source
+            results["extended_std_applied"] = apply_extended_std
             return results
 
     except Exception as e:
@@ -351,25 +393,31 @@ def _check_core_logic(  # noqa: C901
             "unapproved_files": [],
             "unknown_license_files": [],
             "errors": [str(e), traceback.format_exc()],
+            "excludes_source": "unknown",
         }
 
 
 def _check_core_logic_execute_rat(
-    rat_jar_path: str, scan_root: str, temp_dir: str, exclude_file_path: str | 
None
+    rat_jar_path: str,
+    scan_root: str,
+    temp_dir: str,
+    excludes_file_path: str | None,
+    apply_extended_std: bool,
+    excludes_source: str,
 ) -> tuple[dict[str, Any] | None, str | None]:
     """Execute Apache RAT and process its output."""
     xml_output_path = os.path.join(temp_dir, _RAT_REPORT_FILENAME)
     log.info(f"XML output will be written to: {xml_output_path}")
 
     # Convert exclusion file path from temp_dir relative to scan_root relative
-    exclude_file: str | None = None
-    if exclude_file_path is not None:
-        abs_path = os.path.join(temp_dir, exclude_file_path)
+    excludes_file: str | None = None
+    if excludes_file_path is not None:
+        abs_path = os.path.join(temp_dir, excludes_file_path)
         if not (os.path.exists(abs_path) and os.path.isfile(abs_path)):
             log.error(f"Exclusion file not found or not a regular file: 
{abs_path}")
             return {
                 "valid": False,
-                "message": f"Exclusion file is not a regular file: 
{exclude_file_path}",
+                "message": f"Exclusion file is not a regular file: 
{excludes_file_path}",
                 "total_files": 0,
                 "approved_licenses": 0,
                 "unapproved_licenses": 0,
@@ -377,10 +425,11 @@ def _check_core_logic_execute_rat(
                 "unapproved_files": [],
                 "unknown_license_files": [],
                 "errors": [f"Expected exclusion file but found: {abs_path}"],
+                "excludes_source": excludes_source,
             }, None
-        exclude_file = os.path.relpath(abs_path, scan_root)
-        log.info(f"Using exclusion file: {exclude_file}")
-    command = _build_rat_command(rat_jar_path, xml_output_path, exclude_file)
+        excludes_file = os.path.relpath(abs_path, scan_root)
+        log.info(f"Using exclusion file: {excludes_file} (source: 
{excludes_source})")
+    command = _build_rat_command(rat_jar_path, xml_output_path, excludes_file, 
apply_extended_std)
     log.info(f"Running Apache RAT: {' '.join(command)}")
 
     # Change working directory to scan_root when running the process
@@ -424,6 +473,7 @@ def _check_core_logic_execute_rat(
                     f"STDOUT: {process.stdout}",
                     f"STDERR: {process.stderr}",
                 ],
+                "excludes_source": excludes_source,
             }
             return error_dict, None
 
@@ -442,6 +492,7 @@ def _check_core_logic_execute_rat(
             "unapproved_files": [],
             "unknown_license_files": [],
             "errors": [f"Timeout: {e}"],
+            "excludes_source": excludes_source,
         }, None
     except Exception as e:
         # Change back to the original directory before raising
@@ -457,6 +508,7 @@ def _check_core_logic_execute_rat(
             "unapproved_files": [],
             "unknown_license_files": [],
             "errors": [f"Process error: {e}"],
+            "excludes_source": excludes_source,
         }, None
 
     # Change back to the original directory
@@ -479,6 +531,7 @@ def _check_core_logic_execute_rat(
             "unapproved_files": [],
             "unknown_license_files": [],
             "errors": [f"Missing output file: {xml_output_path}"],
+            "excludes_source": excludes_source,
         }, None
 
     # The XML was found correctly
@@ -529,6 +582,7 @@ def _check_core_logic_jar_exists(rat_jar_path: str) -> 
tuple[str, dict[str, Any]
                 "unapproved_files": [],
                 "unknown_license_files": [],
                 "errors": [f"Missing JAR: {rat_jar_path}"],
+                "excludes_source": "unknown",
             }
     else:
         log.info(f"Found Apache RAT JAR at: {rat_jar_path}")
@@ -662,6 +716,7 @@ def _check_java_installed() -> dict[str, Any] | None:
             "unapproved_files": [],
             "unknown_license_files": [],
             "errors": [f"Java error: {e}"],
+            "excludes_source": "unknown",
         }
 
 
diff --git a/tests/unit/test_license_headers.py 
b/tests/unit/test_checks_license.py
similarity index 100%
rename from tests/unit/test_license_headers.py
rename to tests/unit/test_checks_license.py
diff --git a/tests/unit/test_checks_rat.py b/tests/unit/test_checks_rat.py
new file mode 100644
index 0000000..97f3440
--- /dev/null
+++ b/tests/unit/test_checks_rat.py
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pathlib
+
+import pytest
+
+import atr.tasks.checks.rat as rat
+
+TEST_ARCHIVE = pathlib.Path(__file__).parent.parent / "e2e" / "test_files" / 
"apache-test-0.2.tar.gz"
+
+
[email protected]
+def rat_available() -> tuple[bool, bool]:
+    # TODO: Make this work properly in CI
+    java_ok = rat._check_java_installed() is None
+    _, jar_error = 
rat._check_core_logic_jar_exists(rat._CONFIG.APACHE_RAT_JAR_PATH)
+    jar_ok = jar_error is None
+    return (java_ok, jar_ok)
+
+
+def _skip_if_unavailable(rat_available: tuple[bool, bool]) -> None:
+    java_ok, jar_ok = rat_available
+    if not java_ok:
+        pytest.skip("Java not available")
+    if not jar_ok:
+        pytest.skip("RAT JAR not available")
+
+
+def test_check_includes_excludes_source_none(rat_available: tuple[bool, bool]):
+    _skip_if_unavailable(rat_available)
+    result = rat._check_core_logic(str(TEST_ARCHIVE), [])
+    assert result["excludes_source"] == "none"
+
+
+def test_check_includes_excludes_source_policy(rat_available: tuple[bool, 
bool]):
+    _skip_if_unavailable(rat_available)
+    result = rat._check_core_logic(str(TEST_ARCHIVE), ["*.py"])
+    assert result["excludes_source"] == "policy"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(tooling-trusted-releases) 01/01: Use exclusions from release policies in RAT checks

Reply via email to