This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/main by this push:
new f7f0088 Use exclusions from release policies in RAT checks
f7f0088 is described below
commit f7f00887a7162cf05664afc86e25eddd15fbb8f2
Author: Sean B. Palmer <[email protected]>
AuthorDate: Fri Jan 9 20:06:37 2026 +0000
Use exclusions from release policies in RAT checks
---
atr/archives.py | 2 +-
atr/tasks/checks/rat.py | 95 +++++++++++++++++-----
...t_license_headers.py => test_checks_license.py} | 0
tests/unit/test_checks_rat.py | 44 ++++++++++
4 files changed, 120 insertions(+), 21 deletions(-)
diff --git a/atr/archives.py b/atr/archives.py
index 2560c95..0f87d88 100644
--- a/atr/archives.py
+++ b/atr/archives.py
@@ -119,7 +119,7 @@ def _archive_extract_member( # noqa: C901
if _safe_path(extract_dir, member.name) is None:
log.warning(f"Skipping potentially unsafe path: {member.name}")
return 0, extracted_paths
- tf.extract(member, extract_dir, numeric_owner=True)
+ tf.extract(member, extract_dir, numeric_owner=True,
filter="fully_trusted")
elif member.isreg():
extracted_size = _archive_extract_safe_process_file(
diff --git a/atr/tasks/checks/rat.py b/atr/tasks/checks/rat.py
index 6988367..bbe4bce 100644
--- a/atr/tasks/checks/rat.py
+++ b/atr/tasks/checks/rat.py
@@ -47,6 +47,9 @@ _JAVA_MEMORY_ARGS: Final[list[str]] = []
# Generated file patterns, always excluded
_GENERATED_FILE_PATTERNS: Final[list[str]] = [f"**/*{s}" for s in
constants.GENERATED_FILE_SUFFIXES]
+# The name of the temp file for excludes defined in release policies
+_POLICY_EXCLUDES_FILENAME: Final[str] = ".atr-rat-excludes"
+
# The name of the file that contains the exclusions for the specified archive
_RAT_EXCLUDES_FILENAME: Final[str] = ".rat-excludes"
@@ -86,8 +89,11 @@ async def check(args: checks.FunctionArguments) ->
results.Results | None:
log.info(f"Checking RAT licenses for {artifact_abs_path} (rel:
{args.primary_rel_path})")
+ is_source = await recorder.primary_path_is_source()
+ policy_excludes = project.policy_source_excludes_rat if is_source else []
+
try:
- await _check_core(args, recorder, artifact_abs_path)
+ await _check_core(args, recorder, artifact_abs_path, policy_excludes)
except Exception as e:
# TODO: Or bubble for task failure?
await recorder.failure("Error running Apache RAT check", {"error":
str(e)})
@@ -98,7 +104,8 @@ async def check(args: checks.FunctionArguments) ->
results.Results | None:
def _build_rat_command(
rat_jar_path: str,
xml_output_path: str,
- exclude_file: str | None,
+ excludes_file: str | None,
+ apply_extended_std: bool,
) -> list[str]:
"""Build the RAT command with appropriate exclusions."""
command = [
@@ -121,7 +128,7 @@ def _build_rat_command(
for std in _STD_EXCLUSIONS_ALWAYS:
command.extend(["--input-exclude-std", std])
- if exclude_file is None:
+ if apply_extended_std:
for std in _STD_EXCLUSIONS_EXTENDED:
command.extend(["--input-exclude-std", std])
@@ -129,10 +136,11 @@ def _build_rat_command(
command.extend(["--input-exclude", pattern])
command.extend(["--input-exclude", _RAT_REPORT_FILENAME])
+ command.extend(["--input-exclude", _POLICY_EXCLUDES_FILENAME])
- if exclude_file is not None:
+ if excludes_file is not None:
command.extend(["--input-exclude", _RAT_EXCLUDES_FILENAME])
- command.extend(["--input-exclude-file", exclude_file])
+ command.extend(["--input-exclude-file", excludes_file])
command.extend(["--", "."])
@@ -140,11 +148,15 @@ def _build_rat_command(
async def _check_core(
- args: checks.FunctionArguments, recorder: checks.Recorder,
artifact_abs_path: pathlib.Path
+ args: checks.FunctionArguments,
+ recorder: checks.Recorder,
+ artifact_abs_path: pathlib.Path,
+ policy_excludes: list[str],
) -> None:
result_data = await asyncio.to_thread(
_check_core_logic,
artifact_path=str(artifact_abs_path),
+ policy_excludes=policy_excludes,
rat_jar_path=args.extra_args.get("rat_jar_path",
_CONFIG.APACHE_RAT_JAR_PATH),
max_extract_size=args.extra_args.get("max_extract_size",
_CONFIG.MAX_EXTRACT_SIZE),
chunk_size=args.extra_args.get("chunk_size",
_CONFIG.EXTRACT_CHUNK_SIZE),
@@ -187,6 +199,7 @@ async def _check_core(
def _check_core_logic( # noqa: C901
artifact_path: str,
+ policy_excludes: list[str],
rat_jar_path: str = _CONFIG.APACHE_RAT_JAR_PATH,
max_extract_size: int = _CONFIG.MAX_EXTRACT_SIZE,
chunk_size: int = _CONFIG.EXTRACT_CHUNK_SIZE,
@@ -256,14 +269,35 @@ def _check_core_logic( # noqa: C901
"unapproved_files": [],
"unknown_license_files": [],
"errors": [f"Found {len(exclude_file_paths)}
{_RAT_EXCLUDES_FILENAME} files"],
+ "excludes_source": "unknown",
}
# Narrow to single path after validation
- exclude_file_path: str | None = exclude_file_paths[0] if
exclude_file_paths else None
+ archive_excludes_path: str | None = exclude_file_paths[0] if
exclude_file_paths else None
+
+ # Determine excludes_source and effective excludes file
+ excludes_source: str
+ effective_excludes_path: str | None
+
+ if archive_excludes_path is not None:
+ excludes_source = "archive"
+ effective_excludes_path = archive_excludes_path
+ log.info(f"Using archive {_RAT_EXCLUDES_FILENAME}:
{archive_excludes_path}")
+ elif policy_excludes:
+ excludes_source = "policy"
+ policy_excludes_file = os.path.join(temp_dir,
_POLICY_EXCLUDES_FILENAME)
+ with open(policy_excludes_file, "w") as f:
+ f.write("\n".join(policy_excludes))
+ effective_excludes_path =
os.path.relpath(policy_excludes_file, temp_dir)
+ log.info(f"Using policy excludes written to:
{policy_excludes_file}")
+ else:
+ excludes_source = "none"
+ effective_excludes_path = None
+ log.info("No excludes: using defaults only")
- # Determine scan root
- if exclude_file_path is not None:
- scan_root = os.path.dirname(os.path.join(temp_dir,
exclude_file_path))
+ # Determine scan root based on archive .rat-excludes location
+ if archive_excludes_path is not None:
+ scan_root = os.path.dirname(os.path.join(temp_dir,
archive_excludes_path))
# Verify that scan_root is inside temp_dir
abs_scan_root = os.path.abspath(scan_root)
@@ -281,6 +315,7 @@ def _check_core_logic( # noqa: C901
"unapproved_files": [],
"unknown_license_files": [],
"errors": ["Exclusion file path escapes extraction
directory"],
+ "excludes_source": excludes_source,
}
log.info(f"Using {_RAT_EXCLUDES_FILENAME} directory as scan
root: {scan_root}")
@@ -298,16 +333,21 @@ def _check_core_logic( # noqa: C901
"unapproved_files": [],
"unknown_license_files": [],
"errors": [f"{untracked_count} file(s) outside
{_RAT_EXCLUDES_FILENAME} directory"],
+ "excludes_source": excludes_source,
}
else:
scan_root = temp_dir
- log.info(f"No {_RAT_EXCLUDES_FILENAME} found, using temp_dir
as scan root: {scan_root}")
+ log.info(f"No archive {_RAT_EXCLUDES_FILENAME} found, using
temp_dir as scan root: {scan_root}")
# Execute RAT and get results or error
+ # Extended std exclusions apply when there's no archive
.rat-excludes
+ apply_extended_std = excludes_source != "archive"
error_result, xml_output_path = _check_core_logic_execute_rat(
- rat_jar_path, scan_root, temp_dir, exclude_file_path
+ rat_jar_path, scan_root, temp_dir, effective_excludes_path,
apply_extended_std, excludes_source
)
if error_result:
+ error_result["excludes_source"] = excludes_source
+ error_result["extended_std_applied"] = apply_extended_std
return error_result
# Parse the XML output
@@ -335,6 +375,8 @@ def _check_core_logic( # noqa: C901
os.path.normpath(file["name"]),
)
+ results["excludes_source"] = excludes_source
+ results["extended_std_applied"] = apply_extended_std
return results
except Exception as e:
@@ -351,25 +393,31 @@ def _check_core_logic( # noqa: C901
"unapproved_files": [],
"unknown_license_files": [],
"errors": [str(e), traceback.format_exc()],
+ "excludes_source": "unknown",
}
def _check_core_logic_execute_rat(
- rat_jar_path: str, scan_root: str, temp_dir: str, exclude_file_path: str |
None
+ rat_jar_path: str,
+ scan_root: str,
+ temp_dir: str,
+ excludes_file_path: str | None,
+ apply_extended_std: bool,
+ excludes_source: str,
) -> tuple[dict[str, Any] | None, str | None]:
"""Execute Apache RAT and process its output."""
xml_output_path = os.path.join(temp_dir, _RAT_REPORT_FILENAME)
log.info(f"XML output will be written to: {xml_output_path}")
# Convert exclusion file path from temp_dir relative to scan_root relative
- exclude_file: str | None = None
- if exclude_file_path is not None:
- abs_path = os.path.join(temp_dir, exclude_file_path)
+ excludes_file: str | None = None
+ if excludes_file_path is not None:
+ abs_path = os.path.join(temp_dir, excludes_file_path)
if not (os.path.exists(abs_path) and os.path.isfile(abs_path)):
log.error(f"Exclusion file not found or not a regular file:
{abs_path}")
return {
"valid": False,
- "message": f"Exclusion file is not a regular file:
{exclude_file_path}",
+ "message": f"Exclusion file is not a regular file:
{excludes_file_path}",
"total_files": 0,
"approved_licenses": 0,
"unapproved_licenses": 0,
@@ -377,10 +425,11 @@ def _check_core_logic_execute_rat(
"unapproved_files": [],
"unknown_license_files": [],
"errors": [f"Expected exclusion file but found: {abs_path}"],
+ "excludes_source": excludes_source,
}, None
- exclude_file = os.path.relpath(abs_path, scan_root)
- log.info(f"Using exclusion file: {exclude_file}")
- command = _build_rat_command(rat_jar_path, xml_output_path, exclude_file)
+ excludes_file = os.path.relpath(abs_path, scan_root)
+ log.info(f"Using exclusion file: {excludes_file} (source:
{excludes_source})")
+ command = _build_rat_command(rat_jar_path, xml_output_path, excludes_file,
apply_extended_std)
log.info(f"Running Apache RAT: {' '.join(command)}")
# Change working directory to scan_root when running the process
@@ -424,6 +473,7 @@ def _check_core_logic_execute_rat(
f"STDOUT: {process.stdout}",
f"STDERR: {process.stderr}",
],
+ "excludes_source": excludes_source,
}
return error_dict, None
@@ -442,6 +492,7 @@ def _check_core_logic_execute_rat(
"unapproved_files": [],
"unknown_license_files": [],
"errors": [f"Timeout: {e}"],
+ "excludes_source": excludes_source,
}, None
except Exception as e:
# Change back to the original directory before raising
@@ -457,6 +508,7 @@ def _check_core_logic_execute_rat(
"unapproved_files": [],
"unknown_license_files": [],
"errors": [f"Process error: {e}"],
+ "excludes_source": excludes_source,
}, None
# Change back to the original directory
@@ -479,6 +531,7 @@ def _check_core_logic_execute_rat(
"unapproved_files": [],
"unknown_license_files": [],
"errors": [f"Missing output file: {xml_output_path}"],
+ "excludes_source": excludes_source,
}, None
# The XML was found correctly
@@ -529,6 +582,7 @@ def _check_core_logic_jar_exists(rat_jar_path: str) ->
tuple[str, dict[str, Any]
"unapproved_files": [],
"unknown_license_files": [],
"errors": [f"Missing JAR: {rat_jar_path}"],
+ "excludes_source": "unknown",
}
else:
log.info(f"Found Apache RAT JAR at: {rat_jar_path}")
@@ -662,6 +716,7 @@ def _check_java_installed() -> dict[str, Any] | None:
"unapproved_files": [],
"unknown_license_files": [],
"errors": [f"Java error: {e}"],
+ "excludes_source": "unknown",
}
diff --git a/tests/unit/test_license_headers.py
b/tests/unit/test_checks_license.py
similarity index 100%
rename from tests/unit/test_license_headers.py
rename to tests/unit/test_checks_license.py
diff --git a/tests/unit/test_checks_rat.py b/tests/unit/test_checks_rat.py
new file mode 100644
index 0000000..a687bcb
--- /dev/null
+++ b/tests/unit/test_checks_rat.py
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pathlib
+
+import pytest
+
+import atr.tasks.checks.rat as rat
+
+TEST_ARCHIVE = pathlib.Path(__file__).parent.parent / "e2e" / "test_files" /
"apache-test-0.2.tar.gz"
+
+
[email protected]
+def rat_available() -> bool:
+ check = rat._check_java_installed()
+ return check is None
+
+
+def test_check_includes_excludes_source_none(rat_available: bool):
+ if not rat_available:
+ pytest.skip("Java/RAT not available")
+ result = rat._check_core_logic(str(TEST_ARCHIVE), [])
+ assert result["excludes_source"] == "none"
+
+
+def test_check_includes_excludes_source_policy(rat_available: bool):
+ if not rat_available:
+ pytest.skip("Java/RAT not available")
+ result = rat._check_core_logic(str(TEST_ARCHIVE), ["*.py"])
+ assert result["excludes_source"] == "policy"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]