This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git

commit 472d371ea04117f328bfc9612dac2069ebbe351d
Author: Sean B. Palmer <[email protected]>
AuthorDate: Tue Mar 17 19:18:11 2026 +0000

    Read hashes either from the database or by recomputing from disk
---
 atr/db/__init__.py                 | 52 +++++++++++++++++++++++++
 atr/merge.py                       | 77 ++++++++++++++++++++++++--------------
 atr/storage/writers/revision.py    |  3 +-
 atr/tasks/checks/__init__.py       | 24 +++++++++---
 tests/unit/test_create_revision.py |  3 +-
 tests/unit/test_merge.py           | 59 ++++++++++++++++++-----------
 6 files changed, 159 insertions(+), 59 deletions(-)

diff --git a/atr/db/__init__.py b/atr/db/__init__.py
index 41f68796..ebc47462 100644
--- a/atr/db/__init__.py
+++ b/atr/db/__init__.py
@@ -569,6 +569,58 @@ class Session(sqlalchemy.ext.asyncio.AsyncSession):
 
         return Query(self, query)
 
+    async def release_file_hash_at(
+        self,
+        release_key: str,
+        path: str,
+        at_revision_seq: int,
+    ) -> str | None:
+        via = sql.validate_instrumented_attribute
+        query = (
+            sqlmodel.select(sql.ReleaseFileState)
+            .where(
+                sql.ReleaseFileState.release_key == release_key,
+                sql.ReleaseFileState.path == path,
+                via(sql.ReleaseFileState.since_revision_seq) <= 
at_revision_seq,
+            )
+            .order_by(via(sql.ReleaseFileState.since_revision_seq).desc())
+            .limit(1)
+        )
+        result = await self.execute(query)
+        row = result.scalar_one_or_none()
+        if (row is None) or (not row.present):
+            return None
+        return row.content_hash
+
+    async def release_file_hashes_at(
+        self,
+        release_key: str,
+        at_revision_seq: int,
+    ) -> dict[str, str]:
+        via = sql.validate_instrumented_attribute
+        query = (
+            sqlmodel.select(sql.ReleaseFileState)
+            .where(
+                sql.ReleaseFileState.release_key == release_key,
+                via(sql.ReleaseFileState.since_revision_seq) <= 
at_revision_seq,
+            )
+            .order_by(
+                sql.ReleaseFileState.path,
+                via(sql.ReleaseFileState.since_revision_seq).desc(),
+            )
+        )
+        result = await self.execute(query)
+        rows = result.scalars().all()
+        hashes: dict[str, str] = {}
+        seen: set[str] = set()
+        for row in rows:
+            if row.path in seen:
+                continue
+            seen.add(row.path)
+            if row.present and (row.content_hash is not None):
+                hashes[row.path] = row.content_hash
+        return hashes
+
     def release_file_state(
         self,
         release_key: Opt[str] = NOT_SET,
diff --git a/atr/merge.py b/atr/merge.py
index 8f929172..c9b4be66 100644
--- a/atr/merge.py
+++ b/atr/merge.py
@@ -23,9 +23,10 @@ from typing import TYPE_CHECKING
 
 import aiofiles.os
 
-import atr.attestable as attestable
+import atr.db as db
 import atr.hashes as hashes
 import atr.models.safe as safe
+import atr.models.sql as sql
 import atr.util as util
 
 if TYPE_CHECKING:
@@ -33,12 +34,13 @@ if TYPE_CHECKING:
 
 
 async def merge(
+    data: db.Session,
     base_inodes: dict[str, int],
     base_hashes: dict[str, str],
     prior_dir: pathlib.Path,
     project_key: safe.ProjectKey,
     version_key: safe.VersionKey,
-    prior_revision_number: safe.RevisionNumber,
+    prior_revision_seq: int,
     temp_dir: pathlib.Path,
     n_inodes: dict[str, int],
     n_hashes: dict[str, str],
@@ -48,6 +50,7 @@ async def merge(
     # This happens in the _add_from_prior and _replace_with_prior calls 
somewhat below
     prior_inodes = await asyncio.to_thread(util.paths_to_inodes, prior_dir)
     prior_hashes: dict[str, str] | None = None
+    release_key = str(sql.release_key(project_key, version_key))
 
     # Collect implicit directory paths from new (N) files for type conflict 
detection
     n_dirs: set[str] = set()
@@ -70,15 +73,15 @@ async def merge(
             if await aiofiles.os.path.isdir(temp_dir / path):
                 continue
             prior_hashes = await _add_from_prior(
+                data,
                 prior_dir,
                 temp_dir,
                 path,
                 n_hashes,
                 n_sizes,
                 prior_hashes,
-                project_key,
-                version_key,
-                prior_revision_number,
+                release_key,
+                prior_revision_seq,
             )
             continue
 
@@ -96,6 +99,7 @@ async def merge(
         # Cases 4, 5, 6, 8, 11, and 15: all three revisions have this path
         if (b_ino is not None) and (p_ino is not None) and (n_ino is not None):
             prior_hashes = await _merge_all_present(
+                data,
                 base_inodes,
                 base_hashes,
                 prior_dir,
@@ -107,30 +111,29 @@ async def merge(
                 n_hashes,
                 n_sizes,
                 prior_hashes,
-                project_key,
-                version_key,
-                prior_revision_number,
+                release_key,
+                prior_revision_seq,
             )
 
 
 async def _add_from_prior(
+    data: db.Session,
     prior_dir: pathlib.Path,
     temp_dir: pathlib.Path,
     path: str,
     n_hashes: dict[str, str],
     n_sizes: dict[str, int],
     prior_hashes: dict[str, str] | None,
-    project_key: safe.ProjectKey,
-    version_key: safe.VersionKey,
-    prior_revision_number: safe.RevisionNumber,
+    release_key: str,
+    prior_revision_seq: int,
 ) -> dict[str, str] | None:
     target = temp_dir / path
     await asyncio.to_thread(_makedirs_with_permissions, target.parent, 
temp_dir)
     await aiofiles.os.link(prior_dir / path, target)
     if prior_hashes is None:
-        prior_hashes = await attestable.load_paths(project_key, version_key, 
prior_revision_number)
+        prior_hashes = await _prior_hashes_load(data, release_key, 
prior_revision_seq, prior_dir)
     # Update n_hashes and n_sizes in place
-    if (prior_hashes is not None) and (path in prior_hashes):
+    if path in prior_hashes:
         n_hashes[path] = prior_hashes[path]
     else:
         n_hashes[path] = await hashes.compute_file_hash(target)
@@ -166,6 +169,7 @@ def _makedirs_with_permissions(target_parent: pathlib.Path, 
root: pathlib.Path)
 
 
 async def _merge_all_present(
+    data: db.Session,
     _base_inodes: dict[str, int],
     base_hashes: dict[str, str],
     prior_dir: pathlib.Path,
@@ -177,9 +181,8 @@ async def _merge_all_present(
     n_hashes: dict[str, str],
     n_sizes: dict[str, int],
     prior_hashes: dict[str, str] | None,
-    project_key: safe.ProjectKey,
-    version_key: safe.VersionKey,
-    prior_revision_number: safe.RevisionNumber,
+    release_key: str,
+    prior_revision_seq: int,
 ) -> dict[str, str] | None:
     # Cases 6, 8: prior and new share an inode so they already agree
     if p_ino == n_ino:
@@ -192,15 +195,15 @@ async def _merge_all_present(
     # Case 11 via inode: base and new share an inode so prior wins
     if b_ino == n_ino:
         return await _replace_with_prior(
+            data,
             prior_dir,
             temp_dir,
             path,
             n_hashes,
             n_sizes,
             prior_hashes,
-            project_key,
-            version_key,
-            prior_revision_number,
+            release_key,
+            prior_revision_seq,
         )
 
     # Cases 4, 5, 8, 11, 15: all inodes differ, so use hash to distinguish
@@ -208,47 +211,63 @@ async def _merge_all_present(
     n_hash = n_hashes[path]
     if b_hash == n_hash:
         if prior_hashes is None:
-            prior_hashes = await attestable.load_paths(project_key, 
version_key, prior_revision_number)
-        if (prior_hashes is not None) and (path in prior_hashes):
+            prior_hashes = await _prior_hashes_load(data, release_key, 
prior_revision_seq, prior_dir)
+        if path in prior_hashes:
             p_hash = prior_hashes[path]
         else:
             p_hash = await hashes.compute_file_hash(prior_dir / path)
         if p_hash != b_hash:
             # Case 11 via hash: base and new have the same content but prior 
differs
             return await _replace_with_prior(
+                data,
                 prior_dir,
                 temp_dir,
                 path,
                 n_hashes,
                 n_sizes,
                 prior_hashes,
-                project_key,
-                version_key,
-                prior_revision_number,
+                release_key,
+                prior_revision_seq,
             )
 
     # Cases 4, 5, 8, 15: no merge action needed so new wins
     return prior_hashes
 
 
+async def _prior_hashes_load(
+    data: db.Session,
+    release_key: str,
+    prior_revision_seq: int,
+    prior_dir: pathlib.Path,
+) -> dict[str, str]:
+    result = await data.release_file_hashes_at(release_key, prior_revision_seq)
+    if result:
+        return result
+    prior_hashes: dict[str, str] = {}
+    # Slow, but only applies to pre-AttestableV2 release revisions
+    async for rel_path in util.paths_recursive(prior_dir):
+        prior_hashes[str(rel_path)] = await hashes.compute_file_hash(prior_dir 
/ rel_path)
+    return prior_hashes
+
+
 async def _replace_with_prior(
+    data: db.Session,
     prior_dir: pathlib.Path,
     temp_dir: pathlib.Path,
     path: str,
     n_hashes: dict[str, str],
     n_sizes: dict[str, int],
     prior_hashes: dict[str, str] | None,
-    project_key: safe.ProjectKey,
-    version_key: safe.VersionKey,
-    prior_revision_number: safe.RevisionNumber,
+    release_key: str,
+    prior_revision_seq: int,
 ) -> dict[str, str] | None:
     await aiofiles.os.remove(temp_dir / path)
     await aiofiles.os.link(prior_dir / path, temp_dir / path)
     if prior_hashes is None:
-        prior_hashes = await attestable.load_paths(project_key, version_key, 
prior_revision_number)
+        prior_hashes = await _prior_hashes_load(data, release_key, 
prior_revision_seq, prior_dir)
     # Update n_hashes and n_sizes in place
     file_path = temp_dir / path
-    if (prior_hashes is not None) and (path in prior_hashes):
+    if path in prior_hashes:
         n_hashes[path] = prior_hashes[path]
     else:
         n_hashes[path] = await hashes.compute_file_hash(file_path)
diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py
index 9a10f118..51809f7c 100644
--- a/atr/storage/writers/revision.py
+++ b/atr/storage/writers/revision.py
@@ -287,12 +287,13 @@ async def _lock_and_merge(
         prior_number = latest.safe_number
         prior_dir = paths.release_directory_base(merged_release) / 
str(prior_number)
         await merge.merge(
+            data,
             base_inodes,
             base_hashes,
             prior_dir,
             project_key,
             version_key,
-            prior_number,
+            latest.seq,
             temp_dir_path,
             n_inodes,
             path_to_hash,
diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py
index a488d3f5..9065281f 100644
--- a/atr/tasks/checks/__init__.py
+++ b/atr/tasks/checks/__init__.py
@@ -324,10 +324,16 @@ async def resolve_archive_dir(args: FunctionArguments) -> 
pathlib.Path | None:
     """Resolve the extracted archive directory for the primary archive."""
     if args.primary_rel_path is None:
         return None
-    paths_data = await attestable.load_paths(args.project_key, 
args.version_key, args.revision_number)
-    if paths_data is None:
-        return None
-    content_hash = paths_data.get(args.primary_rel_path)
+    release_key = sql.release_key(str(args.project_key), str(args.version_key))
+    revision_seq = int(str(args.revision_number))
+    async with db.session() as data:
+        content_hash = await data.release_file_hash_at(release_key, 
args.primary_rel_path, revision_seq)
+    if content_hash is None:
+        abs_path = file_paths.revision_path_for_file(
+            args.project_key, args.version_key, args.revision_number, 
args.primary_rel_path
+        )
+        if await aiofiles.os.path.isfile(abs_path):
+            content_hash = await hashes.compute_file_hash(abs_path)
     if content_hash is None:
         return None
     archive_key = hashes.filesystem_archives_key(content_hash)
@@ -337,7 +343,7 @@ async def resolve_archive_dir(args: FunctionArguments) -> 
pathlib.Path | None:
     return None
 
 
-async def resolve_cache_key(
+async def resolve_cache_key(  # noqa: C901
     checker: str | Callable[..., Any],
     checker_version: str,
     policy_keys: list[str],
@@ -361,7 +367,13 @@ async def resolve_cache_key(
     else:
         # TODO: Is this fallback valid / necessary? Or should we bail out if 
there's no attestable data?
         policy = release.release_policy or release.project.release_policy
-        if not ignore_path:
+    if not ignore_path:
+        if file:
+            release_key = sql.release_key(str(release.safe_project_key), 
str(release.safe_version_key))
+            revision_seq = int(str(revision))
+            async with db.session() as data:
+                file_hash = await data.release_file_hash_at(release_key, file, 
revision_seq)
+        if file_hash is None:
             if path is None:
                 path = file_paths.revision_path_for_file(
                     release.safe_project_key, release.safe_version_key, 
revision, file or ""
diff --git a/tests/unit/test_create_revision.py 
b/tests/unit/test_create_revision.py
index 4c1d945e..a377baa7 100644
--- a/tests/unit/test_create_revision.py
+++ b/tests/unit/test_create_revision.py
@@ -209,6 +209,7 @@ async def 
test_intervening_revision_triggers_merge_and_uses_latest_parent(tmp_pa
     intervening_revision = mock.MagicMock()
     intervening_revision.key = f"{release_key} 00006"
     intervening_revision.number = "00006"
+    intervening_revision.seq = 6
     intervening_revision.safe_number = safe.RevisionNumber("00006")
 
     first_attestable = mock.MagicMock(paths={"dist/a.tar.gz": "h1"})
@@ -259,7 +260,7 @@ async def 
test_intervening_revision_triggers_merge_and_uses_latest_parent(tmp_pa
 
     merge_await_args = merge_mock.await_args
     assert merge_await_args is not None
-    assert merge_await_args.args[5] == safe.RevisionNumber("00006")
+    assert merge_await_args.args[6] == 6
 
 
 @pytest.mark.asyncio
diff --git a/tests/unit/test_merge.py b/tests/unit/test_merge.py
index 46fd2a82..61adbc0c 100644
--- a/tests/unit/test_merge.py
+++ b/tests/unit/test_merge.py
@@ -39,14 +39,15 @@ async def test_case_09_prior_adds_file(tmp_path: 
pathlib.Path):
 
     mock_prior_hashes = {"added.txt": "blake3:abc123"}
 
-    with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, 
return_value=mock_prior_hashes):
+    with mock.patch("atr.merge._prior_hashes_load", 
new_callable=mock.AsyncMock, return_value=mock_prior_hashes):
         await merge.merge(
+            mock.AsyncMock(),
             base_inodes,
             base_hashes,
             prior_dir,
             "proj",
             "ver",
-            "00002",
+            2,
             temp_dir,
             n_inodes,
             n_hashes,
@@ -74,14 +75,15 @@ async def 
test_case_09_prior_adds_file_in_subdirectory(tmp_path: pathlib.Path):
 
     mock_prior_hashes = {"apple/banana.txt": "blake3:xyz890"}
 
-    with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, 
return_value=mock_prior_hashes):
+    with mock.patch("atr.merge._prior_hashes_load", 
new_callable=mock.AsyncMock, return_value=mock_prior_hashes):
         await merge.merge(
+            mock.AsyncMock(),
             base_inodes,
             base_hashes,
             prior_dir,
             "proj",
             "ver",
-            "00002",
+            2,
             temp_dir,
             n_inodes,
             n_hashes,
@@ -108,14 +110,15 @@ async def test_case_10_prior_deletion_via_hash(tmp_path: 
pathlib.Path):
     n_hashes = {"removed.txt": "blake3:matching"}
     n_sizes = {"removed.txt": len("same content")}
 
-    with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, 
return_value={}):
+    with mock.patch("atr.merge._prior_hashes_load", 
new_callable=mock.AsyncMock, return_value={}):
         await merge.merge(
+            mock.AsyncMock(),
             base_inodes,
             base_hashes,
             prior_dir,
             "proj",
             "ver",
-            "00002",
+            2,
             temp_dir,
             n_inodes,
             n_hashes,
@@ -140,14 +143,15 @@ async def test_case_10_prior_deletion_via_inode(tmp_path: 
pathlib.Path):
     n_hashes = {"removed.txt": "blake3:aaa"}
     n_sizes = {"removed.txt": len("to be deleted")}
 
-    with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, 
return_value={}):
+    with mock.patch("atr.merge._prior_hashes_load", 
new_callable=mock.AsyncMock, return_value={}):
         await merge.merge(
+            mock.AsyncMock(),
             base_inodes,
             base_hashes,
             prior_dir,
             "proj",
             "ver",
-            "00002",
+            2,
             temp_dir,
             n_inodes,
             n_hashes,
@@ -181,14 +185,15 @@ async def 
test_case_11_prior_replacement_via_hash(tmp_path: pathlib.Path):
 
     mock_prior_hashes = {"shared.txt": "blake3:updated"}
 
-    with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, 
return_value=mock_prior_hashes):
+    with mock.patch("atr.merge._prior_hashes_load", 
new_callable=mock.AsyncMock, return_value=mock_prior_hashes):
         await merge.merge(
+            mock.AsyncMock(),
             base_inodes,
             base_hashes,
             prior_dir,
             "proj",
             "ver",
-            "00002",
+            2,
             temp_dir,
             n_inodes,
             n_hashes,
@@ -216,14 +221,15 @@ async def 
test_case_11_prior_replacement_via_inode(tmp_path: pathlib.Path):
 
     mock_prior_hashes = {"shared.txt": "blake3:updated"}
 
-    with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, 
return_value=mock_prior_hashes):
+    with mock.patch("atr.merge._prior_hashes_load", 
new_callable=mock.AsyncMock, return_value=mock_prior_hashes):
         await merge.merge(
+            mock.AsyncMock(),
             base_inodes,
             base_hashes,
             prior_dir,
             "proj",
             "ver",
-            "00002",
+            2,
             temp_dir,
             n_inodes,
             n_hashes,
@@ -248,14 +254,15 @@ async def 
test_case_13_new_wins_when_prior_deletes(tmp_path: pathlib.Path):
     n_hashes = {"modified.txt": "blake3:new"}
     n_sizes = {"modified.txt": len("new content")}
 
-    with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, 
return_value={}):
+    with mock.patch("atr.merge._prior_hashes_load", 
new_callable=mock.AsyncMock, return_value={}):
         await merge.merge(
+            mock.AsyncMock(),
             base_inodes,
             base_hashes,
             prior_dir,
             "proj",
             "ver",
-            "00002",
+            2,
             temp_dir,
             n_inodes,
             n_hashes,
@@ -282,14 +289,15 @@ async def test_noop_when_base_and_prior_agree(tmp_path: 
pathlib.Path):
     n_hashes = {"unchanged.txt": "blake3:modified"}
     n_sizes = {"unchanged.txt": len("modified by new")}
 
-    with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, 
return_value={}) as mock_load:
+    with mock.patch("atr.merge._prior_hashes_load", 
new_callable=mock.AsyncMock, return_value={}) as mock_load:
         await merge.merge(
+            mock.AsyncMock(),
             base_inodes,
             base_hashes,
             prior_dir,
             "proj",
             "ver",
-            "00002",
+            2,
             temp_dir,
             n_inodes,
             n_hashes,
@@ -315,14 +323,15 @@ async def 
test_type_conflict_prior_file_vs_new_directory(tmp_path: pathlib.Path)
     n_hashes = {"docs/guide.txt": "blake3:guide"}
     n_sizes = {"docs/guide.txt": len("a file under a directory in new")}
 
-    with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, 
return_value={"docs": "blake3:docs"}):
+    with mock.patch("atr.merge._prior_hashes_load", 
new_callable=mock.AsyncMock, return_value={"docs": "blake3:docs"}):
         await merge.merge(
+            mock.AsyncMock(),
             base_inodes,
             base_hashes,
             prior_dir,
             "proj",
             "ver",
-            "00002",
+            2,
             temp_dir,
             n_inodes,
             n_hashes,
@@ -348,14 +357,17 @@ async def 
test_type_conflict_prior_file_vs_new_empty_directory(tmp_path: pathlib
     n_hashes: dict[str, str] = {}
     n_sizes: dict[str, int] = {}
 
-    with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, 
return_value={"empty": "blake3:empty"}):
+    with mock.patch(
+        "atr.merge._prior_hashes_load", new_callable=mock.AsyncMock, 
return_value={"empty": "blake3:empty"}
+    ):
         await merge.merge(
+            mock.AsyncMock(),
             base_inodes,
             base_hashes,
             prior_dir,
             "proj",
             "ver",
-            "00002",
+            2,
             temp_dir,
             n_inodes,
             n_hashes,
@@ -381,15 +393,18 @@ async def 
test_type_conflict_prior_subdir_vs_new_file(tmp_path: pathlib.Path):
     n_sizes = {"docs": len("a file in new")}
 
     with mock.patch(
-        "atr.attestable.load_paths", new_callable=mock.AsyncMock, 
return_value={"docs/guide.txt": "blake3:guide"}
+        "atr.merge._prior_hashes_load",
+        new_callable=mock.AsyncMock,
+        return_value={"docs/guide.txt": "blake3:guide"},
     ):
         await merge.merge(
+            mock.AsyncMock(),
             base_inodes,
             base_hashes,
             prior_dir,
             "proj",
             "ver",
-            "00002",
+            2,
             temp_dir,
             n_inodes,
             n_hashes,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to