This is an automated email from the ASF dual-hosted git repository. arm pushed a commit to branch check_caching in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
commit 3161e7cb02389828fbd34b2d95e772df7c0bd716 Author: Alastair McFarlane <[email protected]> AuthorDate: Fri Feb 13 18:06:54 2026 +0000 Read and write checks to/from attestable data --- atr/storage/readers/checks.py | 13 ++------ atr/tasks/__init__.py | 43 +++++++++++++++----------- atr/tasks/checks/__init__.py | 70 ++++++++++++++++++++++++------------------- atr/tasks/checks/paths.py | 36 +++++++--------------- 4 files changed, 79 insertions(+), 83 deletions(-) diff --git a/atr/storage/readers/checks.py b/atr/storage/readers/checks.py index 4edbb7ce..1a95df2c 100644 --- a/atr/storage/readers/checks.py +++ b/atr/storage/readers/checks.py @@ -54,7 +54,7 @@ async def _filter_check_results_by_hash( except (ImportError, AttributeError): policy_keys = [] extra_arg_names = [] - extra_args = checks.resolve_extra_args(extra_arg_names, release) + extra_args = await checks.resolve_extra_args(extra_arg_names, release) cache_key = await checks.resolve_cache_key( cr.checker, policy_keys, release, release.latest_revision_number, extra_args, file=rel_path.name ) @@ -97,20 +97,11 @@ class GeneralPublic: else [] ) - # Filter to checks for the current file version / policy - # Cache the computed input hash per checker module, since all results here share the same file and release - input_hash_by_module: dict[str, str | None] = {} - # TODO: This has a bug - create an archive, it'll scan with a hash and show missing checksum. - # Then generate a checksum. It'll re-scan the file with the same hash, but now has one. Two checks shown. - filtered_check_results = await _filter_check_results_by_hash( - all_check_results, rel_path, input_hash_by_module, release - ) - # Filter out any results that are ignored unignored_checks = [] ignored_checks = [] match_ignore = await self.ignores_matcher(release.project_name) - for cr in filtered_check_results: + for cr in all_check_results: if not match_ignore(cr): unignored_checks.append(cr) else: diff --git a/atr/tasks/__init__.py b/atr/tasks/__init__.py index cb234a31..00beb6d7 100644 --- a/atr/tasks/__init__.py +++ b/atr/tasks/__init__.py @@ -69,7 +69,7 @@ async def asc_checks( signature.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(signature.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(signature.INPUT_EXTRA_ARGS, release), ), extra_args={"committee_name": release.committee.name}, ) @@ -177,11 +177,20 @@ async def draft_checks( release, revision_number, caller_data, + check_cache_key=await checks.resolve_cache_key( + resolve(sql.TaskType.PATHS_CHECK), + paths.INPUT_POLICY_KEYS, + release, + revision_number, + await checks.resolve_extra_args(paths.INPUT_EXTRA_ARGS, release), + ignore_path=True, + ), extra_args={"is_podling": is_podling}, ) - data.add(path_check_task) - if caller_data is None: - await data.commit() + if path_check_task: + data.add(path_check_task) + if caller_data is None: + await data.commit() return len(relative_paths) @@ -386,7 +395,7 @@ async def sha_checks( hashing.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(hashing.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(hashing.INPUT_EXTRA_ARGS, release), file=hash_file, ), ) @@ -406,7 +415,7 @@ async def tar_gz_checks( compare.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release), file=path, ) license_h_ck = await checks.resolve_cache_key( @@ -414,7 +423,7 @@ async def tar_gz_checks( license.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), file=path, ) license_f_ck = await checks.resolve_cache_key( @@ -422,7 +431,7 @@ async def tar_gz_checks( license.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), file=path, ) rat_ck = await checks.resolve_cache_key( @@ -430,7 +439,7 @@ async def tar_gz_checks( rat.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release), file=path, ) targz_i_ck = await checks.resolve_cache_key( @@ -438,7 +447,7 @@ async def tar_gz_checks( targz.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release), file=path, ) targz_s_ck = await checks.resolve_cache_key( @@ -446,7 +455,7 @@ async def tar_gz_checks( targz.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release), file=path, ) tasks = [ @@ -508,7 +517,7 @@ async def zip_checks( compare.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release), file=path, ) license_h_ck = await checks.resolve_cache_key( @@ -516,7 +525,7 @@ async def zip_checks( license.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), file=path, ) license_f_ck = await checks.resolve_cache_key( @@ -524,7 +533,7 @@ async def zip_checks( license.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), file=path, ) rat_ck = await checks.resolve_cache_key( @@ -532,7 +541,7 @@ async def zip_checks( rat.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release), file=path, ) zip_i_ck = await checks.resolve_cache_key( @@ -540,7 +549,7 @@ async def zip_checks( zipformat.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release), file=path, ) zip_s_ck = await checks.resolve_cache_key( @@ -548,7 +557,7 @@ async def zip_checks( zipformat.INPUT_POLICY_KEYS, release, revision, - checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release), + await checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release), file=path, ) diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py index 2f722b68..9b44c70d 100644 --- a/atr/tasks/checks/__init__.py +++ b/atr/tasks/checks/__init__.py @@ -118,7 +118,6 @@ class Recorder: data: Any, primary_rel_path: str | None = None, member_rel_path: str | None = None, - inputs_hash: str | None = None, ) -> sql.CheckResult: if self.constructed is False: raise RuntimeError("Cannot add check result to a recorder that has not been constructed") @@ -144,7 +143,7 @@ class Recorder: message=message, data=data, cached=False, - inputs_hash=inputs_hash or self.__input_hash, + inputs_hash=self.input_hash, ) # It would be more efficient to keep a session open @@ -202,11 +201,10 @@ class Recorder: abs_path = await self.abs_path() return matches(str(abs_path)) - async def cache_key_set(self, policy_keys: list[str], input_args: list[str] | None = None) -> bool: + async def cache_key_set( + self, policy_keys: list[str], input_args: list[str] | None = None, checker: str | None = None + ) -> bool: # TODO: Should this just be in the constructor? - path = await self.abs_path() - if (not path) or (not await aiofiles.os.path.isfile(path)): - return False if config.get().DISABLE_CHECK_CACHE: return False @@ -222,9 +220,9 @@ class Recorder: release = await data.release( name=self.release_name, _release_policy=True, _project_release_policy=True, _project=True ).demand(RuntimeError(f"Release {self.release_name} not found")) - args = resolve_extra_args(input_args or [], release) + args = await resolve_extra_args(input_args or [], release) cache_key = await resolve_cache_key( - self.checker, policy_keys, release, self.revision_number, args, file=self.primary_rel_path + checker or self.checker, policy_keys, release, self.revision_number, args, file=self.primary_rel_path ) self.__input_hash = hashes.compute_dict_hash(cache_key) if cache_key else None return True @@ -255,7 +253,6 @@ class Recorder: data: Any, primary_rel_path: str | None = None, member_rel_path: str | None = None, - inputs_hash: str | None = None, ) -> sql.CheckResult: result = await self._add( sql.CheckResultStatus.BLOCKER, @@ -263,7 +260,6 @@ class Recorder: data, primary_rel_path=primary_rel_path, member_rel_path=member_rel_path, - inputs_hash=inputs_hash, ) await attestable.write_checks_data(self.project_name, self.version_name, self.revision_number, [result.id]) return result @@ -274,7 +270,6 @@ class Recorder: data: Any, primary_rel_path: str | None = None, member_rel_path: str | None = None, - inputs_hash: str | None = None, ) -> sql.CheckResult: result = await self._add( sql.CheckResultStatus.EXCEPTION, @@ -282,7 +277,6 @@ class Recorder: data, primary_rel_path=primary_rel_path, member_rel_path=member_rel_path, - inputs_hash=inputs_hash, ) await attestable.write_checks_data(self.project_name, self.version_name, self.revision_number, [result.id]) return result @@ -293,7 +287,6 @@ class Recorder: data: Any, primary_rel_path: str | None = None, member_rel_path: str | None = None, - inputs_hash: str | None = None, ) -> sql.CheckResult: result = await self._add( sql.CheckResultStatus.FAILURE, @@ -301,7 +294,6 @@ class Recorder: data, primary_rel_path=primary_rel_path, member_rel_path=member_rel_path, - inputs_hash=inputs_hash, ) await attestable.write_checks_data(self.project_name, self.version_name, self.revision_number, [result.id]) return result @@ -312,7 +304,6 @@ class Recorder: data: Any, primary_rel_path: str | None = None, member_rel_path: str | None = None, - inputs_hash: str | None = None, ) -> sql.CheckResult: result = await self._add( sql.CheckResultStatus.SUCCESS, @@ -320,7 +311,6 @@ class Recorder: data, primary_rel_path=primary_rel_path, member_rel_path=member_rel_path, - inputs_hash=inputs_hash, ) await attestable.write_checks_data(self.project_name, self.version_name, self.revision_number, [result.id]) return result @@ -343,7 +333,6 @@ class Recorder: data: Any, primary_rel_path: str | None = None, member_rel_path: str | None = None, - inputs_hash: str | None = None, ) -> sql.CheckResult: result = await self._add( sql.CheckResultStatus.WARNING, @@ -351,7 +340,6 @@ class Recorder: data, primary_rel_path=primary_rel_path, member_rel_path=member_rel_path, - inputs_hash=inputs_hash, ) await attestable.write_checks_data(self.project_name, self.version_name, self.revision_number, [result.id]) return result @@ -369,23 +357,26 @@ async def resolve_cache_key( args: dict[str, Any] | None = None, file: str | None = None, path: pathlib.Path | None = None, + ignore_path: bool = False, ) -> dict[str, Any] | None: - if file is None and path is None: - raise ValueError("Must specify either file or path") if not args: args = {} + cache_key = {"checker": function_key(checker)} + file_hash = None attestable_data = await attestable.load(release.project_name, release.version, revision) if attestable_data: policy = sql.ReleasePolicy.model_validate(attestable_data.policy) - file_hash = attestable_data.paths[file or ""] + if not ignore_path: + file_hash = attestable_data.paths.get(file) if file else None else: # TODO: Is this fallback valid / necessary? Or should we bail out if there's no attestable data? policy = release.release_policy or release.project.release_policy - if path is None: - # We know file isn't None here but type checker doesn't - path = file_paths.revision_path_for_file(release.project_name, release.version, revision, file or "") - file_hash = await hashes.compute_file_hash(path) - cache_key = {"file_hash": file_hash, "checker": function_key(checker)} + if not ignore_path: + if path is None: + path = file_paths.revision_path_for_file(release.project_name, release.version, revision, file or "") + file_hash = await hashes.compute_file_hash(path) + if file_hash: + cache_key["file_hash"] = file_hash if len(policy_keys) > 0 and policy is not None: policy_dict = policy.model_dump(exclude_none=True) @@ -394,7 +385,7 @@ async def resolve_cache_key( return {**cache_key, **args} -def resolve_extra_args(arg_names: list[str], release: sql.Release) -> dict[str, Any]: +async def resolve_extra_args(arg_names: list[str], release: sql.Release) -> dict[str, Any]: result: dict[str, Any] = {} for name in arg_names: resolver = _EXTRA_ARG_RESOLVERS.get(name, None) @@ -402,7 +393,7 @@ def resolve_extra_args(arg_names: list[str], release: sql.Release) -> dict[str, if resolver is None: log.warning(f"Unknown extra arg resolver: {name}") return {} - result[name] = resolver(release) + result[name] = await resolver(release) return result @@ -420,17 +411,36 @@ def with_model(cls: type[schema.Strict]) -> Callable[[Callable[..., Any]], Calla return decorator -def _resolve_is_podling(release: sql.Release) -> bool: +async def _resolve_all_files(release: sql.Release) -> list[str]: + if not release.latest_revision_number: + return [] + if not ( + base_path := file_paths.base_path_for_revision( + release.project_name, release.version, release.latest_revision_number + ) + ): + return [] + + if not await aiofiles.os.path.isdir(base_path): + log.error(f"Base release directory does not exist or is not a directory: {base_path}") + return [] + relative_paths = [p async for p in util.paths_recursive(base_path)] + relative_paths_set = set(str(p) for p in relative_paths) + return list(relative_paths_set) + + +async def _resolve_is_podling(release: sql.Release) -> bool: return (release.committee is not None) and release.committee.is_podling -def _resolve_committee_name(release: sql.Release) -> str: +async def _resolve_committee_name(release: sql.Release) -> str: if release.committee is None: raise ValueError("Release has no committee") return release.committee.name _EXTRA_ARG_RESOLVERS: Final[dict[str, Callable[[sql.Release], Any]]] = { + "all_files": _resolve_all_files, "is_podling": _resolve_is_podling, "committee_name": _resolve_committee_name, } diff --git a/atr/tasks/checks/paths.py b/atr/tasks/checks/paths.py index 98423b18..61a5e527 100644 --- a/atr/tasks/checks/paths.py +++ b/atr/tasks/checks/paths.py @@ -18,12 +18,11 @@ import asyncio import pathlib import re -from typing import Any, Final +from typing import Final import aiofiles.os import atr.analysis as analysis -import atr.hashes as hashes import atr.log as log import atr.models.results as results import atr.tasks.checks as checks @@ -40,7 +39,7 @@ _ALLOWED_TOP_LEVEL: Final = frozenset( ) # Release policy fields which this check relies on - used for result caching INPUT_POLICY_KEYS: Final[list[str]] = [] -INPUT_EXTRA_ARGS: Final[list[str]] = ["is_podling"] +INPUT_EXTRA_ARGS: Final[list[str]] = ["is_podling", "all_files"] async def check(args: checks.FunctionArguments) -> results.Results | None: @@ -89,6 +88,11 @@ async def check(args: checks.FunctionArguments) -> results.Results | None: is_podling = args.extra_args.get("is_podling", False) relative_paths = [p async for p in util.paths_recursive(base_path)] relative_paths_set = set(str(p) for p in relative_paths) + + await recorder_errors.cache_key_set(INPUT_POLICY_KEYS, INPUT_EXTRA_ARGS, checker=checks.function_key(check)) + await recorder_warnings.cache_key_set(INPUT_POLICY_KEYS, INPUT_EXTRA_ARGS, checker=checks.function_key(check)) + await recorder_success.cache_key_set(INPUT_POLICY_KEYS, INPUT_EXTRA_ARGS, checker=checks.function_key(check)) + for relative_path in relative_paths: # Delegate processing of each path to the helper function await _check_path_process_single( @@ -188,9 +192,6 @@ async def _check_path_process_single( full_path = base_path / relative_path relative_path_str = str(relative_path) - file_hash = await hashes.compute_file_hash(full_path) - inputs_hash_key = {"file_hash": file_hash, "is_podling": is_podling} - # For debugging and testing if (await user.is_admin_async(asf_uid)) and (full_path.name == "deliberately_slow_ATR_task_filename.txt"): await asyncio.sleep(20) @@ -236,7 +237,6 @@ async def _check_path_process_single( errors, blockers, warnings, - inputs_hash_key, ) @@ -248,30 +248,16 @@ async def _record( errors: list[str], blockers: list[str], warnings: list[str], - inputs_hash_key: dict[str, Any], ) -> None: for error in errors: - hash_key = inputs_hash_key.copy() - hash_key["checker"] = recorder_errors.checker - inputs_hash = hashes.compute_dict_hash(hash_key) - await recorder_errors.failure(error, {}, primary_rel_path=relative_path_str, inputs_hash=inputs_hash) + await recorder_errors.failure(f"{relative_path_str}: {error}", {}, primary_rel_path=relative_path_str) for item in blockers: - hash_key = inputs_hash_key.copy() - hash_key["checker"] = recorder_errors.checker - inputs_hash = hashes.compute_dict_hash(hash_key) - await recorder_errors.blocker(item, {}, primary_rel_path=relative_path_str, inputs_hash=inputs_hash) + await recorder_errors.blocker(f"{relative_path_str}: {item}", {}, primary_rel_path=relative_path_str) for warning in warnings: - hash_key = inputs_hash_key.copy() - hash_key["checker"] = recorder_warnings.checker - inputs_hash = hashes.compute_dict_hash(hash_key) - await recorder_warnings.warning(warning, {}, primary_rel_path=relative_path_str, inputs_hash=inputs_hash) + await recorder_warnings.warning(f"{relative_path_str}: {warning}", {}, primary_rel_path=relative_path_str) if not (errors or blockers or warnings): - hash_key = inputs_hash_key.copy() - hash_key["checker"] = recorder_success.checker - inputs_hash = hashes.compute_dict_hash(hash_key) await recorder_success.success( - "Path structure and naming conventions conform to policy", + f"{relative_path_str}: Path structure and naming conventions conform to policy", {}, primary_rel_path=relative_path_str, - inputs_hash=inputs_hash, ) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
