This is an automated email from the ASF dual-hosted git repository.

arm pushed a commit to branch check_caching
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git

commit 448abfdf201e1dcf10ce12c6954e806fc2bc8617
Author: Alastair McFarlane <[email protected]>
AuthorDate: Fri Feb 13 18:06:54 2026 +0000

    Read and write checks to/from attestable data
---
 atr/storage/readers/checks.py | 13 ++------
 atr/tasks/__init__.py         | 43 +++++++++++++++-----------
 atr/tasks/checks/__init__.py  | 70 ++++++++++++++++++++++++-------------------
 atr/tasks/checks/paths.py     | 36 +++++++---------------
 4 files changed, 79 insertions(+), 83 deletions(-)

diff --git a/atr/storage/readers/checks.py b/atr/storage/readers/checks.py
index 4edbb7ce..1a95df2c 100644
--- a/atr/storage/readers/checks.py
+++ b/atr/storage/readers/checks.py
@@ -54,7 +54,7 @@ async def _filter_check_results_by_hash(
             except (ImportError, AttributeError):
                 policy_keys = []
                 extra_arg_names = []
-            extra_args = checks.resolve_extra_args(extra_arg_names, release)
+            extra_args = await checks.resolve_extra_args(extra_arg_names, 
release)
             cache_key = await checks.resolve_cache_key(
                 cr.checker, policy_keys, release, 
release.latest_revision_number, extra_args, file=rel_path.name
             )
@@ -97,20 +97,11 @@ class GeneralPublic:
             else []
         )
 
-        # Filter to checks for the current file version / policy
-        # Cache the computed input hash per checker module, since all results 
here share the same file and release
-        input_hash_by_module: dict[str, str | None] = {}
-        # TODO: This has a bug - create an archive, it'll scan with a hash and 
show missing checksum.
-        # Then generate a checksum. It'll re-scan the file with the same hash, 
but now has one. Two checks shown.
-        filtered_check_results = await _filter_check_results_by_hash(
-            all_check_results, rel_path, input_hash_by_module, release
-        )
-
         # Filter out any results that are ignored
         unignored_checks = []
         ignored_checks = []
         match_ignore = await self.ignores_matcher(release.project_name)
-        for cr in filtered_check_results:
+        for cr in all_check_results:
             if not match_ignore(cr):
                 unignored_checks.append(cr)
             else:
diff --git a/atr/tasks/__init__.py b/atr/tasks/__init__.py
index cb234a31..00beb6d7 100644
--- a/atr/tasks/__init__.py
+++ b/atr/tasks/__init__.py
@@ -69,7 +69,7 @@ async def asc_checks(
                     signature.INPUT_POLICY_KEYS,
                     release,
                     revision,
-                    checks.resolve_extra_args(signature.INPUT_EXTRA_ARGS, 
release),
+                    await 
checks.resolve_extra_args(signature.INPUT_EXTRA_ARGS, release),
                 ),
                 extra_args={"committee_name": release.committee.name},
             )
@@ -177,11 +177,20 @@ async def draft_checks(
             release,
             revision_number,
             caller_data,
+            check_cache_key=await checks.resolve_cache_key(
+                resolve(sql.TaskType.PATHS_CHECK),
+                paths.INPUT_POLICY_KEYS,
+                release,
+                revision_number,
+                await checks.resolve_extra_args(paths.INPUT_EXTRA_ARGS, 
release),
+                ignore_path=True,
+            ),
             extra_args={"is_podling": is_podling},
         )
-        data.add(path_check_task)
-        if caller_data is None:
-            await data.commit()
+        if path_check_task:
+            data.add(path_check_task)
+            if caller_data is None:
+                await data.commit()
 
     return len(relative_paths)
 
@@ -386,7 +395,7 @@ async def sha_checks(
                 hashing.INPUT_POLICY_KEYS,
                 release,
                 revision,
-                checks.resolve_extra_args(hashing.INPUT_EXTRA_ARGS, release),
+                await checks.resolve_extra_args(hashing.INPUT_EXTRA_ARGS, 
release),
                 file=hash_file,
             ),
         )
@@ -406,7 +415,7 @@ async def tar_gz_checks(
         compare.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release),
         file=path,
     )
     license_h_ck = await checks.resolve_cache_key(
@@ -414,7 +423,7 @@ async def tar_gz_checks(
         license.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
         file=path,
     )
     license_f_ck = await checks.resolve_cache_key(
@@ -422,7 +431,7 @@ async def tar_gz_checks(
         license.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
         file=path,
     )
     rat_ck = await checks.resolve_cache_key(
@@ -430,7 +439,7 @@ async def tar_gz_checks(
         rat.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
         file=path,
     )
     targz_i_ck = await checks.resolve_cache_key(
@@ -438,7 +447,7 @@ async def tar_gz_checks(
         targz.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
         file=path,
     )
     targz_s_ck = await checks.resolve_cache_key(
@@ -446,7 +455,7 @@ async def tar_gz_checks(
         targz.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
         file=path,
     )
     tasks = [
@@ -508,7 +517,7 @@ async def zip_checks(
         compare.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release),
         file=path,
     )
     license_h_ck = await checks.resolve_cache_key(
@@ -516,7 +525,7 @@ async def zip_checks(
         license.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
         file=path,
     )
     license_f_ck = await checks.resolve_cache_key(
@@ -524,7 +533,7 @@ async def zip_checks(
         license.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
         file=path,
     )
     rat_ck = await checks.resolve_cache_key(
@@ -532,7 +541,7 @@ async def zip_checks(
         rat.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
         file=path,
     )
     zip_i_ck = await checks.resolve_cache_key(
@@ -540,7 +549,7 @@ async def zip_checks(
         zipformat.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
         file=path,
     )
     zip_s_ck = await checks.resolve_cache_key(
@@ -548,7 +557,7 @@ async def zip_checks(
         zipformat.INPUT_POLICY_KEYS,
         release,
         revision,
-        checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
+        await checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
         file=path,
     )
 
diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py
index c5b795ee..8b0d2830 100644
--- a/atr/tasks/checks/__init__.py
+++ b/atr/tasks/checks/__init__.py
@@ -118,7 +118,6 @@ class Recorder:
         data: Any,
         primary_rel_path: str | None = None,
         member_rel_path: str | None = None,
-        inputs_hash: str | None = None,
     ) -> sql.CheckResult:
         if self.constructed is False:
             raise RuntimeError("Cannot add check result to a recorder that has 
not been constructed")
@@ -144,7 +143,7 @@ class Recorder:
             message=message,
             data=data,
             cached=False,
-            inputs_hash=inputs_hash or self.__input_hash,
+            inputs_hash=self.input_hash,
         )
 
         # It would be more efficient to keep a session open
@@ -198,11 +197,10 @@ class Recorder:
         abs_path = await self.abs_path()
         return matches(str(abs_path))
 
-    async def cache_key_set(self, policy_keys: list[str], input_args: 
list[str] | None = None) -> bool:
+    async def cache_key_set(
+        self, policy_keys: list[str], input_args: list[str] | None = None, 
checker: str | None = None
+    ) -> bool:
         # TODO: Should this just be in the constructor?
-        path = await self.abs_path()
-        if (not path) or (not await aiofiles.os.path.isfile(path)):
-            return False
 
         if config.get().DISABLE_CHECK_CACHE:
             return False
@@ -218,9 +216,9 @@ class Recorder:
             release = await data.release(
                 name=self.release_name, _release_policy=True, 
_project_release_policy=True, _project=True
             ).demand(RuntimeError(f"Release {self.release_name} not found"))
-            args = resolve_extra_args(input_args or [], release)
+            args = await resolve_extra_args(input_args or [], release)
             cache_key = await resolve_cache_key(
-                self.checker, policy_keys, release, self.revision_number, 
args, file=self.primary_rel_path
+                checker or self.checker, policy_keys, release, 
self.revision_number, args, file=self.primary_rel_path
             )
             self.__input_hash = hashes.compute_dict_hash(cache_key) if 
cache_key else None
         return True
@@ -251,7 +249,6 @@ class Recorder:
         data: Any,
         primary_rel_path: str | None = None,
         member_rel_path: str | None = None,
-        inputs_hash: str | None = None,
     ) -> sql.CheckResult:
         result = await self._add(
             sql.CheckResultStatus.BLOCKER,
@@ -259,7 +256,6 @@ class Recorder:
             data,
             primary_rel_path=primary_rel_path,
             member_rel_path=member_rel_path,
-            inputs_hash=inputs_hash,
         )
         await attestable.write_checks_data(self.project_name, 
self.version_name, self.revision_number, [result.id])
         return result
@@ -270,7 +266,6 @@ class Recorder:
         data: Any,
         primary_rel_path: str | None = None,
         member_rel_path: str | None = None,
-        inputs_hash: str | None = None,
     ) -> sql.CheckResult:
         result = await self._add(
             sql.CheckResultStatus.EXCEPTION,
@@ -278,7 +273,6 @@ class Recorder:
             data,
             primary_rel_path=primary_rel_path,
             member_rel_path=member_rel_path,
-            inputs_hash=inputs_hash,
         )
         await attestable.write_checks_data(self.project_name, 
self.version_name, self.revision_number, [result.id])
         return result
@@ -289,7 +283,6 @@ class Recorder:
         data: Any,
         primary_rel_path: str | None = None,
         member_rel_path: str | None = None,
-        inputs_hash: str | None = None,
     ) -> sql.CheckResult:
         result = await self._add(
             sql.CheckResultStatus.FAILURE,
@@ -297,7 +290,6 @@ class Recorder:
             data,
             primary_rel_path=primary_rel_path,
             member_rel_path=member_rel_path,
-            inputs_hash=inputs_hash,
         )
         await attestable.write_checks_data(self.project_name, 
self.version_name, self.revision_number, [result.id])
         return result
@@ -308,7 +300,6 @@ class Recorder:
         data: Any,
         primary_rel_path: str | None = None,
         member_rel_path: str | None = None,
-        inputs_hash: str | None = None,
     ) -> sql.CheckResult:
         result = await self._add(
             sql.CheckResultStatus.SUCCESS,
@@ -316,7 +307,6 @@ class Recorder:
             data,
             primary_rel_path=primary_rel_path,
             member_rel_path=member_rel_path,
-            inputs_hash=inputs_hash,
         )
         await attestable.write_checks_data(self.project_name, 
self.version_name, self.revision_number, [result.id])
         return result
@@ -339,7 +329,6 @@ class Recorder:
         data: Any,
         primary_rel_path: str | None = None,
         member_rel_path: str | None = None,
-        inputs_hash: str | None = None,
     ) -> sql.CheckResult:
         result = await self._add(
             sql.CheckResultStatus.WARNING,
@@ -347,7 +336,6 @@ class Recorder:
             data,
             primary_rel_path=primary_rel_path,
             member_rel_path=member_rel_path,
-            inputs_hash=inputs_hash,
         )
         await attestable.write_checks_data(self.project_name, 
self.version_name, self.revision_number, [result.id])
         return result
@@ -365,23 +353,26 @@ async def resolve_cache_key(
     args: dict[str, Any] | None = None,
     file: str | None = None,
     path: pathlib.Path | None = None,
+    ignore_path: bool = False,
 ) -> dict[str, Any] | None:
-    if file is None and path is None:
-        raise ValueError("Must specify either file or path")
     if not args:
         args = {}
+    cache_key = {"checker": function_key(checker)}
+    file_hash = None
     attestable_data = await attestable.load(release.project_name, 
release.version, revision)
     if attestable_data:
         policy = sql.ReleasePolicy.model_validate(attestable_data.policy)
-        file_hash = attestable_data.paths[file or ""]
+        if not ignore_path:
+            file_hash = attestable_data.paths.get(file) if file else None
     else:
         # TODO: Is this fallback valid / necessary? Or should we bail out if 
there's no attestable data?
         policy = release.release_policy or release.project.release_policy
-        if path is None:
-            # We know file isn't None here but type checker doesn't
-            path = file_paths.revision_path_for_file(release.project_name, 
release.version, revision, file or "")
-        file_hash = await hashes.compute_file_hash(path)
-    cache_key = {"file_hash": file_hash, "checker": function_key(checker)}
+        if not ignore_path:
+            if path is None:
+                path = file_paths.revision_path_for_file(release.project_name, 
release.version, revision, file or "")
+            file_hash = await hashes.compute_file_hash(path)
+    if file_hash:
+        cache_key["file_hash"] = file_hash
 
     if len(policy_keys) > 0 and policy is not None:
         policy_dict = policy.model_dump(exclude_none=True)
@@ -390,7 +381,7 @@ async def resolve_cache_key(
         return {**cache_key, **args}
 
 
-def resolve_extra_args(arg_names: list[str], release: sql.Release) -> 
dict[str, Any]:
+async def resolve_extra_args(arg_names: list[str], release: sql.Release) -> 
dict[str, Any]:
     result: dict[str, Any] = {}
     for name in arg_names:
         resolver = _EXTRA_ARG_RESOLVERS.get(name, None)
@@ -398,7 +389,7 @@ def resolve_extra_args(arg_names: list[str], release: 
sql.Release) -> dict[str,
         if resolver is None:
             log.warning(f"Unknown extra arg resolver: {name}")
             return {}
-        result[name] = resolver(release)
+        result[name] = await resolver(release)
     return result
 
 
@@ -416,17 +407,36 @@ def with_model(cls: type[schema.Strict]) -> 
Callable[[Callable[..., Any]], Calla
     return decorator
 
 
-def _resolve_is_podling(release: sql.Release) -> bool:
+async def _resolve_all_files(release: sql.Release) -> list[str]:
+    if not release.latest_revision_number:
+        return []
+    if not (
+        base_path := file_paths.base_path_for_revision(
+            release.project_name, release.version, 
release.latest_revision_number
+        )
+    ):
+        return []
+
+    if not await aiofiles.os.path.isdir(base_path):
+        log.error(f"Base release directory does not exist or is not a 
directory: {base_path}")
+        return []
+    relative_paths = [p async for p in util.paths_recursive(base_path)]
+    relative_paths_set = set(str(p) for p in relative_paths)
+    return list(relative_paths_set)
+
+
+async def _resolve_is_podling(release: sql.Release) -> bool:
     return (release.committee is not None) and release.committee.is_podling
 
 
-def _resolve_committee_name(release: sql.Release) -> str:
+async def _resolve_committee_name(release: sql.Release) -> str:
     if release.committee is None:
         raise ValueError("Release has no committee")
     return release.committee.name
 
 
 _EXTRA_ARG_RESOLVERS: Final[dict[str, Callable[[sql.Release], Any]]] = {
+    "all_files": _resolve_all_files,
     "is_podling": _resolve_is_podling,
     "committee_name": _resolve_committee_name,
 }
diff --git a/atr/tasks/checks/paths.py b/atr/tasks/checks/paths.py
index 16bd10bc..d37fc66b 100644
--- a/atr/tasks/checks/paths.py
+++ b/atr/tasks/checks/paths.py
@@ -18,12 +18,11 @@
 import asyncio
 import pathlib
 import re
-from typing import Any, Final
+from typing import Final
 
 import aiofiles.os
 
 import atr.analysis as analysis
-import atr.hashes as hashes
 import atr.log as log
 import atr.models.results as results
 import atr.tasks.checks as checks
@@ -40,7 +39,7 @@ _ALLOWED_TOP_LEVEL: Final = frozenset(
 )
 # Release policy fields which this check relies on - used for result caching
 INPUT_POLICY_KEYS: Final[list[str]] = []
-INPUT_EXTRA_ARGS: Final[list[str]] = ["is_podling"]
+INPUT_EXTRA_ARGS: Final[list[str]] = ["is_podling", "all_files"]
 
 
 async def check(args: checks.FunctionArguments) -> results.Results | None:
@@ -89,6 +88,11 @@ async def check(args: checks.FunctionArguments) -> 
results.Results | None:
     is_podling = args.extra_args.get("is_podling", False)
     relative_paths = [p async for p in util.paths_recursive(base_path)]
     relative_paths_set = set(str(p) for p in relative_paths)
+
+    await recorder_errors.cache_key_set(INPUT_POLICY_KEYS, INPUT_EXTRA_ARGS, 
checker=checks.function_key(check))
+    await recorder_warnings.cache_key_set(INPUT_POLICY_KEYS, INPUT_EXTRA_ARGS, 
checker=checks.function_key(check))
+    await recorder_success.cache_key_set(INPUT_POLICY_KEYS, INPUT_EXTRA_ARGS, 
checker=checks.function_key(check))
+
     for relative_path in relative_paths:
         # Delegate processing of each path to the helper function
         await _check_path_process_single(
@@ -196,9 +200,6 @@ async def _check_path_process_single(  # noqa: C901
     full_path = base_path / relative_path
     relative_path_str = str(relative_path)
 
-    file_hash = await hashes.compute_file_hash(full_path)
-    inputs_hash_key = {"file_hash": file_hash, "is_podling": is_podling}
-
     # For debugging and testing
     if (await user.is_admin_async(asf_uid)) and (full_path.name == 
"deliberately_slow_ATR_task_filename.txt"):
         await asyncio.sleep(20)
@@ -283,7 +284,6 @@ async def _check_path_process_single(  # noqa: C901
         errors,
         blockers,
         warnings,
-        inputs_hash_key,
     )
 
 
@@ -295,30 +295,16 @@ async def _record(
     errors: list[str],
     blockers: list[str],
     warnings: list[str],
-    inputs_hash_key: dict[str, Any],
 ) -> None:
     for error in errors:
-        hash_key = inputs_hash_key.copy()
-        hash_key["checker"] = recorder_errors.checker
-        inputs_hash = hashes.compute_dict_hash(hash_key)
-        await recorder_errors.failure(error, {}, 
primary_rel_path=relative_path_str, inputs_hash=inputs_hash)
+        await recorder_errors.failure(f"{relative_path_str}: {error}", {}, 
primary_rel_path=relative_path_str)
     for item in blockers:
-        hash_key = inputs_hash_key.copy()
-        hash_key["checker"] = recorder_errors.checker
-        inputs_hash = hashes.compute_dict_hash(hash_key)
-        await recorder_errors.blocker(item, {}, 
primary_rel_path=relative_path_str, inputs_hash=inputs_hash)
+        await recorder_errors.blocker(f"{relative_path_str}: {item}", {}, 
primary_rel_path=relative_path_str)
     for warning in warnings:
-        hash_key = inputs_hash_key.copy()
-        hash_key["checker"] = recorder_warnings.checker
-        inputs_hash = hashes.compute_dict_hash(hash_key)
-        await recorder_warnings.warning(warning, {}, 
primary_rel_path=relative_path_str, inputs_hash=inputs_hash)
+        await recorder_warnings.warning(f"{relative_path_str}: {warning}", {}, 
primary_rel_path=relative_path_str)
     if not (errors or blockers or warnings):
-        hash_key = inputs_hash_key.copy()
-        hash_key["checker"] = recorder_success.checker
-        inputs_hash = hashes.compute_dict_hash(hash_key)
         await recorder_success.success(
-            "Path structure and naming conventions conform to policy",
+            f"{relative_path_str}: Path structure and naming conventions 
conform to policy",
             {},
             primary_rel_path=relative_path_str,
-            inputs_hash=inputs_hash,
         )


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to