This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/sbp by this push:
new 209816de Add a module for file classification and use it
209816de is described below
commit 209816de18ed80d7153417d3a17926d9abf0979f
Author: Sean B. Palmer <[email protected]>
AuthorDate: Fri Feb 13 17:31:28 2026 +0000
Add a module for file classification and use it
---
atr/classify.py | 49 ++++++++++++++++++++++++++++
atr/storage/readers/releases.py | 23 ++++---------
atr/storage/types.py | 5 ++-
atr/tasks/checks/__init__.py | 8 ++---
atr/templates/check-selected-path-table.html | 29 +++-------------
atr/templates/check-selected.html | 5 ---
atr/util.py | 4 ++-
7 files changed, 67 insertions(+), 56 deletions(-)
diff --git a/atr/classify.py b/atr/classify.py
new file mode 100644
index 00000000..f909dc9e
--- /dev/null
+++ b/atr/classify.py
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+import pathlib
+import re
+from collections.abc import Callable
+
+import atr.analysis as analysis
+
+
+class FileType(enum.Enum):
+ BINARY = "binary"
+ DISALLOWED = "disallowed"
+ METADATA = "metadata"
+ SOURCE = "source"
+
+
+def classify(
+ path: pathlib.Path,
+ base_path: pathlib.Path | None = None,
+ source_matcher: Callable[[str], bool] | None = None,
+) -> FileType:
+ if (path.name in analysis.DISALLOWED_FILENAMES) or (path.suffix in
analysis.DISALLOWED_SUFFIXES):
+ return FileType.DISALLOWED
+
+ search = re.search(analysis.extension_pattern(), str(path))
+ if search and search.group("metadata"):
+ return FileType.METADATA
+
+ if search and search.group("artifact") and (source_matcher is not None)
and (base_path is not None):
+ if source_matcher(str(base_path / path)):
+ return FileType.SOURCE
+
+ return FileType.BINARY
diff --git a/atr/storage/readers/releases.py b/atr/storage/readers/releases.py
index f45539f2..e57a9786 100644
--- a/atr/storage/readers/releases.py
+++ b/atr/storage/readers/releases.py
@@ -20,9 +20,8 @@ from __future__ import annotations
import dataclasses
import pathlib
-import re
-import atr.analysis as analysis
+import atr.classify as classify
import atr.db as db
import atr.models.sql as sql
import atr.storage as storage
@@ -60,21 +59,13 @@ class GeneralPublic:
if latest_revision_number is None:
return None
await self.__successes_errors_warnings(release,
latest_revision_number, info)
- for path in paths:
- # Get artifacts and metadata
- search = re.search(analysis.extension_pattern(), str(path))
- if search:
- if search.group("artifact"):
- info.artifacts.add(path)
- elif search.group("metadata"):
- info.metadata.add(path)
+ base_path = util.release_directory(release)
+ source_matcher = None
source_artifact_paths = release.project.policy_source_artifact_paths
- if source_artifact_paths and info.artifacts:
- base_path = util.release_directory(release)
- source_matcher = util.create_path_matcher(source_artifact_paths,
base_path / ".ignore", base_path)
- for path in info.artifacts:
- if source_matcher(str(base_path / path)):
- info.sources.add(path)
+ if source_artifact_paths:
+ source_matcher = util.create_path_matcher(source_artifact_paths,
None, base_path)
+ for path in paths:
+ info.file_types[path] = classify.classify(path,
base_path=base_path, source_matcher=source_matcher)
self.__compute_checker_stats(info, paths)
return info
diff --git a/atr/storage/types.py b/atr/storage/types.py
index 7b957a3b..3cd74f6b 100644
--- a/atr/storage/types.py
+++ b/atr/storage/types.py
@@ -20,6 +20,7 @@ import enum
import pathlib
from collections.abc import Callable
+import atr.classify as classify
import atr.models.schema as schema
import atr.models.sql as sql
import atr.storage.outcome as outcome
@@ -63,13 +64,11 @@ class LinkedCommittee:
class PathInfo(schema.Strict):
- artifacts: set[pathlib.Path] = schema.factory(set)
checker_stats: list[CheckerStats] = schema.factory(list)
errors: dict[pathlib.Path, list[sql.CheckResult]] = schema.factory(dict)
+ file_types: dict[pathlib.Path, classify.FileType] = schema.factory(dict)
ignored_errors: list[sql.CheckResult] = schema.factory(list)
ignored_warnings: list[sql.CheckResult] = schema.factory(list)
- metadata: set[pathlib.Path] = schema.factory(set)
- sources: set[pathlib.Path] = schema.factory(set)
successes: dict[pathlib.Path, list[sql.CheckResult]] = schema.factory(dict)
warnings: dict[pathlib.Path, list[sql.CheckResult]] = schema.factory(dict)
diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py
index 08d2c4c6..1b78f68e 100644
--- a/atr/tasks/checks/__init__.py
+++ b/atr/tasks/checks/__init__.py
@@ -182,9 +182,7 @@ class Recorder:
project = await self.project()
if not project.policy_binary_artifact_paths:
return False
- matches = util.create_path_matcher(
- project.policy_binary_artifact_paths, self.abs_path_base() /
".ignore", self.abs_path_base()
- )
+ matches =
util.create_path_matcher(project.policy_binary_artifact_paths, None,
self.abs_path_base())
abs_path = await self.abs_path()
return matches(str(abs_path))
@@ -194,9 +192,7 @@ class Recorder:
project = await self.project()
if not project.policy_source_artifact_paths:
return False
- matches = util.create_path_matcher(
- project.policy_source_artifact_paths, self.abs_path_base() /
".ignore", self.abs_path_base()
- )
+ matches =
util.create_path_matcher(project.policy_source_artifact_paths, None,
self.abs_path_base())
abs_path = await self.abs_path()
return matches(str(abs_path))
diff --git a/atr/templates/check-selected-path-table.html
b/atr/templates/check-selected-path-table.html
index 3b4f6096..fd960e0f 100644
--- a/atr/templates/check-selected-path-table.html
+++ b/atr/templates/check-selected-path-table.html
@@ -28,33 +28,12 @@
{% endif %}
<tr class="{{ row_bg_class }}">
- {#
- <td class="text-center px-1 py-2 page-icon-cell">
- {% if info and (path in info.artifacts) %}
- <i class="bi bi-archive {{ icon_class }}"
- title="Artifact"
- aria-label="Artifact"></i>
- {% elif info and (path in info.metadata) %}
- <i class="bi bi-file-earmark-text {{ icon_class }}"
- title="Metadata"
- aria-label="Metadata"></i>
- {% else %}
- <i class="bi bi-file-earmark {{ icon_class }}"
- title="File"
- aria-label="File"></i>
- {% endif %}
- </td>
- #}
<td class="text-center px-0 py-2 atr-sans {{ icon_class }}">
- {% if info and (path in info.sources) %}
- {% set file_type = "source" %}
- {% elif info and (path in info.metadata) %}
- {% set file_type = "metadata" %}
- {% else %}
- {% set file_type = "binary" %}
- {% endif %}
+ {% set file_type = info.file_types[path].value if (info and (path
in info.file_types)) else None %}
- {% if file_type == "source" %}
+ {% if file_type == "disallowed" %}
+ <span title="Disallowed file">Ⓧ</span>
+ {% elif file_type == "source" %}
<span title="Source artifact">Ⓢ</span>
{% elif file_type == "metadata" %}
<span title="Metadata file">Ⓜ</span>
diff --git a/atr/templates/check-selected.html
b/atr/templates/check-selected.html
index 0a92433c..3328b666 100644
--- a/atr/templates/check-selected.html
+++ b/atr/templates/check-selected.html
@@ -11,11 +11,6 @@
background-color: #eeeeee;
}
- .page-icon-cell {
- width: 2em;
- text-align: center;
- }
-
table tr {
border-bottom: none;
}
diff --git a/atr/util.py b/atr/util.py
index 2428a94c..22d25447 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -324,7 +324,9 @@ async def create_hard_link_clone(
await _clone_recursive(source_dir, dest_dir)
-def create_path_matcher(lines: Iterable[str], full_path: pathlib.Path,
base_dir: pathlib.Path) -> Callable[[str], bool]:
+def create_path_matcher(
+ lines: Iterable[str], full_path: pathlib.Path | None, base_dir:
pathlib.Path
+) -> Callable[[str], bool]:
rules = []
negation = False
for line_no, line in enumerate(lines, start=1):
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]