This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/sbp by this push:
     new 209816de Add a module for file classification and use it
209816de is described below

commit 209816de18ed80d7153417d3a17926d9abf0979f
Author: Sean B. Palmer <[email protected]>
AuthorDate: Fri Feb 13 17:31:28 2026 +0000

    Add a module for file classification and use it
---
 atr/classify.py                              | 49 ++++++++++++++++++++++++++++
 atr/storage/readers/releases.py              | 23 ++++---------
 atr/storage/types.py                         |  5 ++-
 atr/tasks/checks/__init__.py                 |  8 ++---
 atr/templates/check-selected-path-table.html | 29 +++-------------
 atr/templates/check-selected.html            |  5 ---
 atr/util.py                                  |  4 ++-
 7 files changed, 67 insertions(+), 56 deletions(-)

diff --git a/atr/classify.py b/atr/classify.py
new file mode 100644
index 00000000..f909dc9e
--- /dev/null
+++ b/atr/classify.py
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+import pathlib
+import re
+from collections.abc import Callable
+
+import atr.analysis as analysis
+
+
+class FileType(enum.Enum):
+    BINARY = "binary"
+    DISALLOWED = "disallowed"
+    METADATA = "metadata"
+    SOURCE = "source"
+
+
+def classify(
+    path: pathlib.Path,
+    base_path: pathlib.Path | None = None,
+    source_matcher: Callable[[str], bool] | None = None,
+) -> FileType:
+    if (path.name in analysis.DISALLOWED_FILENAMES) or (path.suffix in 
analysis.DISALLOWED_SUFFIXES):
+        return FileType.DISALLOWED
+
+    search = re.search(analysis.extension_pattern(), str(path))
+    if search and search.group("metadata"):
+        return FileType.METADATA
+
+    if search and search.group("artifact") and (source_matcher is not None) 
and (base_path is not None):
+        if source_matcher(str(base_path / path)):
+            return FileType.SOURCE
+
+    return FileType.BINARY
diff --git a/atr/storage/readers/releases.py b/atr/storage/readers/releases.py
index f45539f2..e57a9786 100644
--- a/atr/storage/readers/releases.py
+++ b/atr/storage/readers/releases.py
@@ -20,9 +20,8 @@ from __future__ import annotations
 
 import dataclasses
 import pathlib
-import re
 
-import atr.analysis as analysis
+import atr.classify as classify
 import atr.db as db
 import atr.models.sql as sql
 import atr.storage as storage
@@ -60,21 +59,13 @@ class GeneralPublic:
         if latest_revision_number is None:
             return None
         await self.__successes_errors_warnings(release, 
latest_revision_number, info)
-        for path in paths:
-            # Get artifacts and metadata
-            search = re.search(analysis.extension_pattern(), str(path))
-            if search:
-                if search.group("artifact"):
-                    info.artifacts.add(path)
-                elif search.group("metadata"):
-                    info.metadata.add(path)
+        base_path = util.release_directory(release)
+        source_matcher = None
         source_artifact_paths = release.project.policy_source_artifact_paths
-        if source_artifact_paths and info.artifacts:
-            base_path = util.release_directory(release)
-            source_matcher = util.create_path_matcher(source_artifact_paths, 
base_path / ".ignore", base_path)
-            for path in info.artifacts:
-                if source_matcher(str(base_path / path)):
-                    info.sources.add(path)
+        if source_artifact_paths:
+            source_matcher = util.create_path_matcher(source_artifact_paths, 
None, base_path)
+        for path in paths:
+            info.file_types[path] = classify.classify(path, 
base_path=base_path, source_matcher=source_matcher)
         self.__compute_checker_stats(info, paths)
         return info
 
diff --git a/atr/storage/types.py b/atr/storage/types.py
index 7b957a3b..3cd74f6b 100644
--- a/atr/storage/types.py
+++ b/atr/storage/types.py
@@ -20,6 +20,7 @@ import enum
 import pathlib
 from collections.abc import Callable
 
+import atr.classify as classify
 import atr.models.schema as schema
 import atr.models.sql as sql
 import atr.storage.outcome as outcome
@@ -63,13 +64,11 @@ class LinkedCommittee:
 
 
 class PathInfo(schema.Strict):
-    artifacts: set[pathlib.Path] = schema.factory(set)
     checker_stats: list[CheckerStats] = schema.factory(list)
     errors: dict[pathlib.Path, list[sql.CheckResult]] = schema.factory(dict)
+    file_types: dict[pathlib.Path, classify.FileType] = schema.factory(dict)
     ignored_errors: list[sql.CheckResult] = schema.factory(list)
     ignored_warnings: list[sql.CheckResult] = schema.factory(list)
-    metadata: set[pathlib.Path] = schema.factory(set)
-    sources: set[pathlib.Path] = schema.factory(set)
     successes: dict[pathlib.Path, list[sql.CheckResult]] = schema.factory(dict)
     warnings: dict[pathlib.Path, list[sql.CheckResult]] = schema.factory(dict)
 
diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py
index 08d2c4c6..1b78f68e 100644
--- a/atr/tasks/checks/__init__.py
+++ b/atr/tasks/checks/__init__.py
@@ -182,9 +182,7 @@ class Recorder:
         project = await self.project()
         if not project.policy_binary_artifact_paths:
             return False
-        matches = util.create_path_matcher(
-            project.policy_binary_artifact_paths, self.abs_path_base() / 
".ignore", self.abs_path_base()
-        )
+        matches = 
util.create_path_matcher(project.policy_binary_artifact_paths, None, 
self.abs_path_base())
         abs_path = await self.abs_path()
         return matches(str(abs_path))
 
@@ -194,9 +192,7 @@ class Recorder:
         project = await self.project()
         if not project.policy_source_artifact_paths:
             return False
-        matches = util.create_path_matcher(
-            project.policy_source_artifact_paths, self.abs_path_base() / 
".ignore", self.abs_path_base()
-        )
+        matches = 
util.create_path_matcher(project.policy_source_artifact_paths, None, 
self.abs_path_base())
         abs_path = await self.abs_path()
         return matches(str(abs_path))
 
diff --git a/atr/templates/check-selected-path-table.html 
b/atr/templates/check-selected-path-table.html
index 3b4f6096..fd960e0f 100644
--- a/atr/templates/check-selected-path-table.html
+++ b/atr/templates/check-selected-path-table.html
@@ -28,33 +28,12 @@
         {% endif %}
 
         <tr class="{{ row_bg_class }}">
-          {#
-          <td class="text-center px-1 py-2 page-icon-cell">
-            {% if info and (path in info.artifacts) %}
-              <i class="bi bi-archive {{ icon_class }}"
-                 title="Artifact"
-                 aria-label="Artifact"></i>
-            {% elif info and (path in info.metadata) %}
-              <i class="bi bi-file-earmark-text {{ icon_class }}"
-                 title="Metadata"
-                 aria-label="Metadata"></i>
-            {% else %}
-              <i class="bi bi-file-earmark {{ icon_class }}"
-                 title="File"
-                 aria-label="File"></i>
-            {% endif %}
-          </td>
-          #}
           <td class="text-center px-0 py-2 atr-sans {{ icon_class }}">
-            {% if info and (path in info.sources) %}
-              {% set file_type = "source" %}
-            {% elif info and (path in info.metadata) %}
-              {% set file_type = "metadata" %}
-            {% else %}
-              {% set file_type = "binary" %}
-            {% endif %}
+            {% set file_type = info.file_types[path].value if (info and (path 
in info.file_types)) else None %}
 
-            {% if file_type == "source" %}
+            {% if file_type == "disallowed" %}
+              <span title="Disallowed file">Ⓧ</span>
+            {% elif file_type == "source" %}
               <span title="Source artifact">Ⓢ</span>
             {% elif file_type == "metadata" %}
               <span title="Metadata file">Ⓜ</span>
diff --git a/atr/templates/check-selected.html 
b/atr/templates/check-selected.html
index 0a92433c..3328b666 100644
--- a/atr/templates/check-selected.html
+++ b/atr/templates/check-selected.html
@@ -11,11 +11,6 @@
       background-color: #eeeeee;
     }
 
-    .page-icon-cell {
-      width: 2em;
-      text-align: center;
-    }
-
     table tr {
       border-bottom: none;
     }
diff --git a/atr/util.py b/atr/util.py
index 2428a94c..22d25447 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -324,7 +324,9 @@ async def create_hard_link_clone(
     await _clone_recursive(source_dir, dest_dir)
 
 
-def create_path_matcher(lines: Iterable[str], full_path: pathlib.Path, 
base_dir: pathlib.Path) -> Callable[[str], bool]:
+def create_path_matcher(
+    lines: Iterable[str], full_path: pathlib.Path | None, base_dir: 
pathlib.Path
+) -> Callable[[str], bool]:
     rules = []
     negation = False
     for line_no, line in enumerate(lines, start=1):


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to