This is an automated email from the ASF dual-hosted git repository. sbp pushed a commit to branch sbp in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
commit d8489a6c6f62a42943b0b45867946c43d914a37a Author: Sean B. Palmer <[email protected]> AuthorDate: Fri Mar 20 17:44:19 2026 +0000 Add a classification type for documentation --- atr/analysis.py | 1 + atr/classify.py | 3 ++- atr/tasks/checks/license.py | 11 +++++------ atr/tasks/checks/rat.py | 7 +++---- atr/tasks/checks/targz.py | 2 +- atr/tasks/checks/zipformat.py | 2 +- tests/unit/recorders.py | 4 ++-- tests/unit/test_classify.py | 27 +++++++++++++++++++++++++++ 8 files changed, 42 insertions(+), 15 deletions(-) diff --git a/atr/analysis.py b/atr/analysis.py index 2502e8db..8ee7d7d2 100755 --- a/atr/analysis.py +++ b/atr/analysis.py @@ -70,6 +70,7 @@ DISALLOWED_FILENAMES: Final[frozenset[str]] = frozenset( "id_ecdsa", "id_ed25519", "id_rsa", + "KEYS", "Thumbs.db", } ) diff --git a/atr/classify.py b/atr/classify.py index 12b99a1c..06cba511 100644 --- a/atr/classify.py +++ b/atr/classify.py @@ -108,6 +108,7 @@ _TOKEN_SPLIT_RE: Final[re.Pattern[str]] = re.compile(r"[-_.]+") class FileType(enum.Enum): BINARY = "binary" DISALLOWED = "disallowed" + DOCS = "docs" METADATA = "metadata" SOURCE = "source" @@ -167,7 +168,7 @@ def classify( def classify_from_counts(source_count: int, binary_count: int, docs_count: int) -> FileType: if (source_count == 0) and (binary_count == 0): if docs_count > 0: - return FileType.BINARY + return FileType.DOCS return FileType.SOURCE if source_count >= binary_count: return FileType.SOURCE diff --git a/atr/tasks/checks/license.py b/atr/tasks/checks/license.py index 79a45cda..baff7a83 100644 --- a/atr/tasks/checks/license.py +++ b/atr/tasks/checks/license.py @@ -137,8 +137,8 @@ async def files(args: checks.FunctionArguments) -> results.Results | None: if not (artifact_abs_path := await recorder.abs_path()): return None - is_binary = await recorder.primary_path_is_binary() - if not is_binary: + is_source = await recorder.primary_path_is_source() + if is_source: project = await recorder.project() if project.policy_license_check_mode == sql.LicenseCheckMode.RAT: return None @@ -156,7 +156,7 @@ async def files(args: checks.FunctionArguments) -> results.Results | None: log.info(f"Checking license files for {artifact_abs_path} (rel: {args.primary_rel_path})") try: - for result in await asyncio.to_thread(_files_check_core_logic, archive_dir, is_podling, is_binary): + for result in await asyncio.to_thread(_files_check_core_logic, archive_dir, is_podling, not is_source): match result: case ArtifactResult(): await _record_artifact(recorder, result) @@ -178,8 +178,8 @@ async def headers(args: checks.FunctionArguments) -> results.Results | None: if not (artifact_abs_path := await recorder.abs_path()): return None - is_binary = await recorder.primary_path_is_binary() - if not is_binary: + is_source = await recorder.primary_path_is_source() + if is_source: project = await recorder.project() if project.policy_license_check_mode == sql.LicenseCheckMode.RAT: return None @@ -198,7 +198,6 @@ async def headers(args: checks.FunctionArguments) -> results.Results | None: log.info(f"Checking license headers for {artifact_abs_path} (rel: {args.primary_rel_path})") - is_source = await recorder.primary_path_is_source() project = await recorder.project() ignore_lines: list[str] = [] diff --git a/atr/tasks/checks/rat.py b/atr/tasks/checks/rat.py index d11d03e1..ca9648ba 100644 --- a/atr/tasks/checks/rat.py +++ b/atr/tasks/checks/rat.py @@ -80,8 +80,8 @@ async def check(args: checks.FunctionArguments) -> results.Results | None: recorder = await args.recorder(CHECK_VERSION) if not (artifact_abs_path := await recorder.abs_path()): return None - if await recorder.primary_path_is_binary(): - log.info(f"Skipping RAT check for binary artifact {artifact_abs_path} (rel: {args.primary_rel_path})") + if not await recorder.primary_path_is_source(): + log.info(f"Skipping RAT check for non-source artifact {artifact_abs_path} (rel: {args.primary_rel_path})") return None project = await recorder.project() @@ -99,8 +99,7 @@ async def check(args: checks.FunctionArguments) -> results.Results | None: log.info(f"Checking RAT licenses for {artifact_abs_path} (rel: {args.primary_rel_path})") - is_source = await recorder.primary_path_is_source() - policy_excludes = project.policy_source_excludes_rat if is_source else [] + policy_excludes = project.policy_source_excludes_rat try: await _check_core(args, recorder, archive_dir, policy_excludes) diff --git a/atr/tasks/checks/targz.py b/atr/tasks/checks/targz.py index c955f404..38924c29 100644 --- a/atr/tasks/checks/targz.py +++ b/atr/tasks/checks/targz.py @@ -78,7 +78,7 @@ async def structure(args: checks.FunctionArguments) -> results.Results | None: recorder = await args.recorder(CHECK_VERSION_STRUCTURE) if not (artifact_abs_path := await recorder.abs_path()): return None - if await recorder.primary_path_is_binary(): + if not await recorder.primary_path_is_source(): return None archive_dir = await checks.resolve_archive_dir(args) diff --git a/atr/tasks/checks/zipformat.py b/atr/tasks/checks/zipformat.py index a4b85839..e18adedf 100644 --- a/atr/tasks/checks/zipformat.py +++ b/atr/tasks/checks/zipformat.py @@ -41,7 +41,7 @@ async def structure(args: checks.FunctionArguments) -> results.Results | None: recorder = await args.recorder(CHECK_VERSION_STRUCTURE) if not (artifact_abs_path := await recorder.abs_path()): return None - if await recorder.primary_path_is_binary(): + if not await recorder.primary_path_is_source(): return None archive_dir = await checks.resolve_archive_dir(args) diff --git a/tests/unit/recorders.py b/tests/unit/recorders.py index 0abfe405..aabb0b9d 100644 --- a/tests/unit/recorders.py +++ b/tests/unit/recorders.py @@ -44,8 +44,8 @@ class RecorderStub(checks.Recorder): async def abs_path(self, rel_path: str | None = None) -> pathlib.Path | None: return self._path if (rel_path is None) else self._path / rel_path - async def primary_path_is_binary(self) -> bool: - return False + async def primary_path_is_source(self) -> bool: + return True async def _add( self, diff --git a/tests/unit/test_classify.py b/tests/unit/test_classify.py index 3ec7946a..f4bd563f 100644 --- a/tests/unit/test_classify.py +++ b/tests/unit/test_classify.py @@ -58,11 +58,38 @@ def test_binary_stem_heuristic(): assert classify.classify(path) == classify.FileType.BINARY +def test_counts_docs_only(): + assert classify.classify_from_counts(0, 0, 1) == classify.FileType.DOCS + + +def test_counts_docs_with_binary(): + assert classify.classify_from_counts(0, 1, 1) == classify.FileType.BINARY + + +def test_counts_docs_with_source(): + assert classify.classify_from_counts(1, 0, 1) == classify.FileType.SOURCE + + def test_disallowed_files_detected(): path = pathlib.Path(".DS_Store") assert classify.classify(path) == classify.FileType.DISALLOWED +def test_docs_stem_heuristic(): + path = pathlib.Path("apache-widget-1.0-docs.tar.gz") + assert classify.classify(path) == classify.FileType.DOCS + + +def test_docs_stem_heuristic_javadoc(): + path = pathlib.Path("apache-widget-1.0-javadoc.zip") + assert classify.classify(path) == classify.FileType.DOCS + + +def test_docs_stem_heuristic_site(): + path = pathlib.Path("apache-widget-1.0-site.tar.gz") + assert classify.classify(path) == classify.FileType.DOCS + + def test_jar_defaults_to_binary(): path = pathlib.Path("apache-widget-1.0.jar") assert classify.classify(path) == classify.FileType.BINARY --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
