This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/sbp by this push:
     new 87b6591a Get classifications either from the database or by recomputing
87b6591a is described below

commit 87b6591a9f2f2e3d65e8d3b782ecf2f84f56558e
Author: Sean B. Palmer <[email protected]>
AuthorDate: Tue Mar 17 19:38:13 2026 +0000

    Get classifications either from the database or by recomputing
---
 atr/db/__init__.py              | 52 +++++++++++++++++++++++++++++++++++++++++
 atr/storage/readers/releases.py | 14 ++++++++---
 atr/tasks/checks/__init__.py    |  8 +++++++
 3 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/atr/db/__init__.py b/atr/db/__init__.py
index ebc47462..78c03332 100644
--- a/atr/db/__init__.py
+++ b/atr/db/__init__.py
@@ -569,6 +569,58 @@ class Session(sqlalchemy.ext.asyncio.AsyncSession):
 
         return Query(self, query)
 
+    async def release_file_classification_at(
+        self,
+        release_key: str,
+        path: str,
+        at_revision_seq: int,
+    ) -> str | None:
+        via = sql.validate_instrumented_attribute
+        query = (
+            sqlmodel.select(sql.ReleaseFileState)
+            .where(
+                sql.ReleaseFileState.release_key == release_key,
+                sql.ReleaseFileState.path == path,
+                via(sql.ReleaseFileState.since_revision_seq) <= 
at_revision_seq,
+            )
+            .order_by(via(sql.ReleaseFileState.since_revision_seq).desc())
+            .limit(1)
+        )
+        result = await self.execute(query)
+        row = result.scalar_one_or_none()
+        if (row is None) or (not row.present):
+            return None
+        return row.classification
+
+    async def release_file_classifications_at(
+        self,
+        release_key: str,
+        at_revision_seq: int,
+    ) -> dict[str, str]:
+        via = sql.validate_instrumented_attribute
+        query = (
+            sqlmodel.select(sql.ReleaseFileState)
+            .where(
+                sql.ReleaseFileState.release_key == release_key,
+                via(sql.ReleaseFileState.since_revision_seq) <= 
at_revision_seq,
+            )
+            .order_by(
+                sql.ReleaseFileState.path,
+                via(sql.ReleaseFileState.since_revision_seq).desc(),
+            )
+        )
+        result = await self.execute(query)
+        rows = result.scalars().all()
+        classifications: dict[str, str] = {}
+        seen: set[str] = set()
+        for row in rows:
+            if row.path in seen:
+                continue
+            seen.add(row.path)
+            if row.present and (row.classification is not None):
+                classifications[row.path] = row.classification
+        return classifications
+
     async def release_file_hash_at(
         self,
         release_key: str,
diff --git a/atr/storage/readers/releases.py b/atr/storage/readers/releases.py
index e17290f9..e24ca802 100644
--- a/atr/storage/readers/releases.py
+++ b/atr/storage/readers/releases.py
@@ -62,15 +62,23 @@ class GeneralPublic:
             return None
         await self.__successes_errors_warnings(release, 
release.safe_latest_revision_number, info)
         base_path = paths.release_directory(release)
+        revision_seq = int(str(release.safe_latest_revision_number))
+        db_classifications = await 
self.__data.release_file_classifications_at(release.key, revision_seq)
+        # TODO: This should get the matchers from attestable data policy
+        # But this branch is only a fallback for pre-AttestableV2 releases
         source_matcher, binary_matcher = classify.matchers_from_policy(
             release.project.policy_source_artifact_paths,
             release.project.policy_binary_artifact_paths,
             base_path,
         )
         for path in all_paths:
-            info.file_types[path] = classify.classify(
-                path, base_path=base_path, source_matcher=source_matcher, 
binary_matcher=binary_matcher
-            )
+            db_value = db_classifications.get(str(path))
+            if db_value is not None:
+                info.file_types[path] = classify.FileType(db_value)
+            else:
+                info.file_types[path] = classify.classify(
+                    path, base_path=base_path, source_matcher=source_matcher, 
binary_matcher=binary_matcher
+                )
         self.__compute_checker_stats(info, all_paths)
         return info
 
diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py
index 9065281f..0b8beaff 100644
--- a/atr/tasks/checks/__init__.py
+++ b/atr/tasks/checks/__init__.py
@@ -204,8 +204,16 @@ class Recorder:
     async def _classify_primary_path(self) -> classify.FileType:
         if self.primary_rel_path is None:
             return classify.FileType.BINARY
+        release_key = str(self.release_key)
+        revision_seq = int(str(self.revision_number))
+        async with db.session() as data:
+            classification = await 
data.release_file_classification_at(release_key, self.primary_rel_path, 
revision_seq)
+        if classification is not None:
+            return classify.FileType(classification)
         project = await self.project()
         base_path = self.abs_path_base()
+        # TODO: This should get the matchers from attestable data policy
+        # But this branch is only a fallback for pre-AttestableV2 releases
         source_matcher, binary_matcher = classify.matchers_from_policy(
             project.policy_source_artifact_paths,
             project.policy_binary_artifact_paths,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to