This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git

commit d8489a6c6f62a42943b0b45867946c43d914a37a
Author: Sean B. Palmer <[email protected]>
AuthorDate: Fri Mar 20 17:44:19 2026 +0000

    Add a classification type for documentation
---
 atr/analysis.py               |  1 +
 atr/classify.py               |  3 ++-
 atr/tasks/checks/license.py   | 11 +++++------
 atr/tasks/checks/rat.py       |  7 +++----
 atr/tasks/checks/targz.py     |  2 +-
 atr/tasks/checks/zipformat.py |  2 +-
 tests/unit/recorders.py       |  4 ++--
 tests/unit/test_classify.py   | 27 +++++++++++++++++++++++++++
 8 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/atr/analysis.py b/atr/analysis.py
index 2502e8db..8ee7d7d2 100755
--- a/atr/analysis.py
+++ b/atr/analysis.py
@@ -70,6 +70,7 @@ DISALLOWED_FILENAMES: Final[frozenset[str]] = frozenset(
         "id_ecdsa",
         "id_ed25519",
         "id_rsa",
+        "KEYS",
         "Thumbs.db",
     }
 )
diff --git a/atr/classify.py b/atr/classify.py
index 12b99a1c..06cba511 100644
--- a/atr/classify.py
+++ b/atr/classify.py
@@ -108,6 +108,7 @@ _TOKEN_SPLIT_RE: Final[re.Pattern[str]] = 
re.compile(r"[-_.]+")
 class FileType(enum.Enum):
     BINARY = "binary"
     DISALLOWED = "disallowed"
+    DOCS = "docs"
     METADATA = "metadata"
     SOURCE = "source"
 
@@ -167,7 +168,7 @@ def classify(
 def classify_from_counts(source_count: int, binary_count: int, docs_count: 
int) -> FileType:
     if (source_count == 0) and (binary_count == 0):
         if docs_count > 0:
-            return FileType.BINARY
+            return FileType.DOCS
         return FileType.SOURCE
     if source_count >= binary_count:
         return FileType.SOURCE
diff --git a/atr/tasks/checks/license.py b/atr/tasks/checks/license.py
index 79a45cda..baff7a83 100644
--- a/atr/tasks/checks/license.py
+++ b/atr/tasks/checks/license.py
@@ -137,8 +137,8 @@ async def files(args: checks.FunctionArguments) -> 
results.Results | None:
     if not (artifact_abs_path := await recorder.abs_path()):
         return None
 
-    is_binary = await recorder.primary_path_is_binary()
-    if not is_binary:
+    is_source = await recorder.primary_path_is_source()
+    if is_source:
         project = await recorder.project()
         if project.policy_license_check_mode == sql.LicenseCheckMode.RAT:
             return None
@@ -156,7 +156,7 @@ async def files(args: checks.FunctionArguments) -> 
results.Results | None:
     log.info(f"Checking license files for {artifact_abs_path} (rel: 
{args.primary_rel_path})")
 
     try:
-        for result in await asyncio.to_thread(_files_check_core_logic, 
archive_dir, is_podling, is_binary):
+        for result in await asyncio.to_thread(_files_check_core_logic, 
archive_dir, is_podling, not is_source):
             match result:
                 case ArtifactResult():
                     await _record_artifact(recorder, result)
@@ -178,8 +178,8 @@ async def headers(args: checks.FunctionArguments) -> 
results.Results | None:
     if not (artifact_abs_path := await recorder.abs_path()):
         return None
 
-    is_binary = await recorder.primary_path_is_binary()
-    if not is_binary:
+    is_source = await recorder.primary_path_is_source()
+    if is_source:
         project = await recorder.project()
         if project.policy_license_check_mode == sql.LicenseCheckMode.RAT:
             return None
@@ -198,7 +198,6 @@ async def headers(args: checks.FunctionArguments) -> 
results.Results | None:
 
     log.info(f"Checking license headers for {artifact_abs_path} (rel: 
{args.primary_rel_path})")
 
-    is_source = await recorder.primary_path_is_source()
     project = await recorder.project()
 
     ignore_lines: list[str] = []
diff --git a/atr/tasks/checks/rat.py b/atr/tasks/checks/rat.py
index d11d03e1..ca9648ba 100644
--- a/atr/tasks/checks/rat.py
+++ b/atr/tasks/checks/rat.py
@@ -80,8 +80,8 @@ async def check(args: checks.FunctionArguments) -> 
results.Results | None:
     recorder = await args.recorder(CHECK_VERSION)
     if not (artifact_abs_path := await recorder.abs_path()):
         return None
-    if await recorder.primary_path_is_binary():
-        log.info(f"Skipping RAT check for binary artifact {artifact_abs_path} 
(rel: {args.primary_rel_path})")
+    if not await recorder.primary_path_is_source():
+        log.info(f"Skipping RAT check for non-source artifact 
{artifact_abs_path} (rel: {args.primary_rel_path})")
         return None
 
     project = await recorder.project()
@@ -99,8 +99,7 @@ async def check(args: checks.FunctionArguments) -> 
results.Results | None:
 
     log.info(f"Checking RAT licenses for {artifact_abs_path} (rel: 
{args.primary_rel_path})")
 
-    is_source = await recorder.primary_path_is_source()
-    policy_excludes = project.policy_source_excludes_rat if is_source else []
+    policy_excludes = project.policy_source_excludes_rat
 
     try:
         await _check_core(args, recorder, archive_dir, policy_excludes)
diff --git a/atr/tasks/checks/targz.py b/atr/tasks/checks/targz.py
index c955f404..38924c29 100644
--- a/atr/tasks/checks/targz.py
+++ b/atr/tasks/checks/targz.py
@@ -78,7 +78,7 @@ async def structure(args: checks.FunctionArguments) -> 
results.Results | None:
     recorder = await args.recorder(CHECK_VERSION_STRUCTURE)
     if not (artifact_abs_path := await recorder.abs_path()):
         return None
-    if await recorder.primary_path_is_binary():
+    if not await recorder.primary_path_is_source():
         return None
 
     archive_dir = await checks.resolve_archive_dir(args)
diff --git a/atr/tasks/checks/zipformat.py b/atr/tasks/checks/zipformat.py
index a4b85839..e18adedf 100644
--- a/atr/tasks/checks/zipformat.py
+++ b/atr/tasks/checks/zipformat.py
@@ -41,7 +41,7 @@ async def structure(args: checks.FunctionArguments) -> 
results.Results | None:
     recorder = await args.recorder(CHECK_VERSION_STRUCTURE)
     if not (artifact_abs_path := await recorder.abs_path()):
         return None
-    if await recorder.primary_path_is_binary():
+    if not await recorder.primary_path_is_source():
         return None
 
     archive_dir = await checks.resolve_archive_dir(args)
diff --git a/tests/unit/recorders.py b/tests/unit/recorders.py
index 0abfe405..aabb0b9d 100644
--- a/tests/unit/recorders.py
+++ b/tests/unit/recorders.py
@@ -44,8 +44,8 @@ class RecorderStub(checks.Recorder):
     async def abs_path(self, rel_path: str | None = None) -> pathlib.Path | 
None:
         return self._path if (rel_path is None) else self._path / rel_path
 
-    async def primary_path_is_binary(self) -> bool:
-        return False
+    async def primary_path_is_source(self) -> bool:
+        return True
 
     async def _add(
         self,
diff --git a/tests/unit/test_classify.py b/tests/unit/test_classify.py
index 3ec7946a..f4bd563f 100644
--- a/tests/unit/test_classify.py
+++ b/tests/unit/test_classify.py
@@ -58,11 +58,38 @@ def test_binary_stem_heuristic():
     assert classify.classify(path) == classify.FileType.BINARY
 
 
+def test_counts_docs_only():
+    assert classify.classify_from_counts(0, 0, 1) == classify.FileType.DOCS
+
+
+def test_counts_docs_with_binary():
+    assert classify.classify_from_counts(0, 1, 1) == classify.FileType.BINARY
+
+
+def test_counts_docs_with_source():
+    assert classify.classify_from_counts(1, 0, 1) == classify.FileType.SOURCE
+
+
 def test_disallowed_files_detected():
     path = pathlib.Path(".DS_Store")
     assert classify.classify(path) == classify.FileType.DISALLOWED
 
 
+def test_docs_stem_heuristic():
+    path = pathlib.Path("apache-widget-1.0-docs.tar.gz")
+    assert classify.classify(path) == classify.FileType.DOCS
+
+
+def test_docs_stem_heuristic_javadoc():
+    path = pathlib.Path("apache-widget-1.0-javadoc.zip")
+    assert classify.classify(path) == classify.FileType.DOCS
+
+
+def test_docs_stem_heuristic_site():
+    path = pathlib.Path("apache-widget-1.0-site.tar.gz")
+    assert classify.classify(path) == classify.FileType.DOCS
+
+
 def test_jar_defaults_to_binary():
     path = pathlib.Path("apache-widget-1.0.jar")
     assert classify.classify(path) == classify.FileType.BINARY


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to