This is an automated email from the ASF dual-hosted git repository. arm pushed a commit to branch check_caching in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
commit a96e11aa6dfe6b473dcc3ce577a1beb01d1aaf55 Author: Alastair McFarlane <[email protected]> AuthorDate: Thu Feb 12 15:37:32 2026 +0000 Include release policy in attestation and pull hashes for checks from there. --- atr/attestable.py | 5 +- atr/models/attestable.py | 3 +- atr/storage/readers/checks.py | 10 +- atr/storage/writers/revision.py | 5 +- atr/tasks/__init__.py | 199 +++++++++++++++++++++++++++++++++++++--- atr/tasks/checks/__init__.py | 21 +++-- 6 files changed, 213 insertions(+), 30 deletions(-) diff --git a/atr/attestable.py b/atr/attestable.py index cac950c8..e44be1fe 100644 --- a/atr/attestable.py +++ b/atr/attestable.py @@ -139,12 +139,13 @@ async def write( project_name: str, version_name: str, revision_number: str, + release_policy: dict[str, Any] | None, uploader_uid: str, previous: models.AttestableV1 | None, path_to_hash: dict[str, str], path_to_size: dict[str, int], ) -> None: - result = _generate(path_to_hash, path_to_size, revision_number, uploader_uid, previous) + result = _generate(path_to_hash, path_to_size, revision_number, release_policy, uploader_uid, previous) file_path = attestable_path(project_name, version_name, revision_number) await util.atomic_write_file(file_path, result.model_dump_json(indent=2)) paths_result = models.AttestablePathsV1(paths=result.paths) @@ -191,6 +192,7 @@ def _generate( path_to_hash: dict[str, str], path_to_size: dict[str, int], revision_number: str, + release_policy: dict[str, Any] | None, uploader_uid: str, previous: models.AttestableV1 | None, ) -> models.AttestableV1: @@ -205,4 +207,5 @@ def _generate( return models.AttestableV1( paths=dict(path_to_hash), hashes=dict(new_hashes), + policy=release_policy or {}, ) diff --git a/atr/models/attestable.py b/atr/models/attestable.py index 4bc574bd..45e3ac3d 100644 --- a/atr/models/attestable.py +++ b/atr/models/attestable.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from typing import Annotated, Literal +from typing import Annotated, Any, Literal import pydantic @@ -36,3 +36,4 @@ class AttestableV1(schema.Strict): version: Literal[1] = 1 paths: dict[str, str] = schema.factory(dict) hashes: dict[str, HashEntry] = schema.factory(dict) + policy: dict[str, Any] = schema.factory(dict) diff --git a/atr/storage/readers/checks.py b/atr/storage/readers/checks.py index 5797ba04..8c5c3e2e 100644 --- a/atr/storage/readers/checks.py +++ b/atr/storage/readers/checks.py @@ -22,7 +22,6 @@ import importlib from typing import TYPE_CHECKING import atr.db as db -import atr.file_paths as file_paths import atr.hashing as hashing import atr.models.sql as sql import atr.storage as storage @@ -37,7 +36,7 @@ if TYPE_CHECKING: async def _filter_check_results_by_hash( all_check_results: Sequence[sql.CheckResult], - file_path: pathlib.Path, + rel_path: pathlib.Path, input_hash_by_module: dict[str, str | None], release: sql.Release, ) -> Sequence[sql.CheckResult]: @@ -56,7 +55,7 @@ async def _filter_check_results_by_hash( extra_arg_names = [] extra_args = checks.resolve_extra_args(extra_arg_names, release) cache_key = await checks.resolve_cache_key( - policy_keys, release, release.latest_revision_number, extra_args, path=file_path + policy_keys, release, release.latest_revision_number, extra_args, file=rel_path.name ) input_hash_by_module[module_path] = hashing.compute_dict_hash(cache_key) if cache_key else None @@ -93,16 +92,13 @@ class GeneralPublic: ) all_check_results = await query.all() - file_path = file_paths.revision_path_for_file( - release.project_name, release.version, release.latest_revision_number, rel_path.name - ) # Filter to checks for the current file version / policy # Cache the computed input hash per checker module, since all results here share the same file and release input_hash_by_module: dict[str, str | None] = {} # TODO: This has a bug - create an archive, it'll scan with a hash and show missing checksum. # Then generate a checksum. It'll re-scan the file with the same hash, but now has one. Two checks shown. filtered_check_results = await _filter_check_results_by_hash( - all_check_results, file_path, input_hash_by_module, release + all_check_results, rel_path, input_hash_by_module, release ) # Filter out any results that are ignored diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py index 5371a0b5..cb273606 100644 --- a/atr/storage/writers/revision.py +++ b/atr/storage/writers/revision.py @@ -118,7 +118,7 @@ class CommitteeParticipant(FoundationCommitter): # Get the release release_name = sql.release_name(project_name, version_name) async with db.session() as data: - release = await data.release(name=release_name).demand( + release = await data.release(name=release_name, _release_policy=True, _project_release_policy=True).demand( RuntimeError("Release does not exist for new revision creation") ) old_revision = await interaction.latest_revision(release) @@ -243,10 +243,13 @@ class CommitteeParticipant(FoundationCommitter): await aioshutil.rmtree(temp_dir) raise + policy = release.release_policy or release.project.release_policy + await attestable.write( project_name, version_name, new_revision.number, + policy.model_dump() if policy else None, asf_uid, previous_attestable, path_to_hash, diff --git a/atr/tasks/__init__.py b/atr/tasks/__init__.py index 9eca7a8e..93797bcc 100644 --- a/atr/tasks/__init__.py +++ b/atr/tasks/__init__.py @@ -63,7 +63,13 @@ async def asc_checks( revision, data, signature_path, - {"committee_name": release.committee.name}, + check_cache_key=await checks.resolve_cache_key( + signature.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(signature.INPUT_EXTRA_ARGS, release), + ), + extra_args={"committee_name": release.committee.name}, ) ) @@ -362,7 +368,23 @@ async def sha_checks( """Create hash check task for a .sha256 or .sha512 file.""" tasks = [] - tasks.append(queued(asf_uid, sql.TaskType.HASHING_CHECK, release, revision, data, hash_file)) + tasks.append( + queued( + asf_uid, + sql.TaskType.HASHING_CHECK, + release, + revision, + data, + hash_file, + check_cache_key=await checks.resolve_cache_key( + file_hash.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(file_hash.INPUT_EXTRA_ARGS, release), + file=hash_file, + ), + ) + ) return await asyncio.gather(*tasks) @@ -375,7 +397,21 @@ async def tar_gz_checks( is_podling = (release.project.committee is not None) and release.project.committee.is_podling tasks = [ - queued(asf_uid, sql.TaskType.COMPARE_SOURCE_TREES, release, revision, data, path), + queued( + asf_uid, + sql.TaskType.COMPARE_SOURCE_TREES, + release, + revision, + data, + path, + check_cache_key=await checks.resolve_cache_key( + compare.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release), + file=path, + ), + ), queued( asf_uid, sql.TaskType.LICENSE_FILES, @@ -384,7 +420,11 @@ async def tar_gz_checks( data, path, check_cache_key=await checks.resolve_cache_key( - license.INPUT_POLICY_KEYS, release, revision, {**{"is_podling": is_podling}}, file=path + license.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), + file=path, ), extra_args={"is_podling": is_podling}, ), @@ -395,11 +435,59 @@ async def tar_gz_checks( revision, data, path, - check_cache_key=await checks.resolve_cache_key(license.INPUT_POLICY_KEYS, release, revision, file=path), + check_cache_key=await checks.resolve_cache_key( + license.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), + file=path, + ), + ), + queued( + asf_uid, + sql.TaskType.RAT_CHECK, + release, + revision, + data, + path, + check_cache_key=await checks.resolve_cache_key( + rat.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release), + file=path, + ), + ), + queued( + asf_uid, + sql.TaskType.TARGZ_INTEGRITY, + release, + revision, + data, + path, + check_cache_key=await checks.resolve_cache_key( + targz.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release), + file=path, + ), + ), + queued( + asf_uid, + sql.TaskType.TARGZ_STRUCTURE, + release, + revision, + data, + path, + check_cache_key=await checks.resolve_cache_key( + targz.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release), + file=path, + ), ), - queued(asf_uid, sql.TaskType.RAT_CHECK, release, revision, data, path), - queued(asf_uid, sql.TaskType.TARGZ_INTEGRITY, release, revision, data, path), - queued(asf_uid, sql.TaskType.TARGZ_STRUCTURE, release, revision, data, path), ] return await asyncio.gather(*tasks) @@ -439,14 +527,97 @@ async def zip_checks( # This release has committee, as guaranteed in draft_checks is_podling = (release.project.committee is not None) and release.project.committee.is_podling tasks = [ - queued(asf_uid, sql.TaskType.COMPARE_SOURCE_TREES, release, revision, data, path), queued( - asf_uid, sql.TaskType.LICENSE_FILES, release, revision, data, path, extra_args={"is_podling": is_podling} + asf_uid, + sql.TaskType.COMPARE_SOURCE_TREES, + release, + revision, + data, + path, + check_cache_key=await checks.resolve_cache_key( + compare.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release), + file=path, + ), + ), + queued( + asf_uid, + sql.TaskType.LICENSE_FILES, + release, + revision, + data, + path, + check_cache_key=await checks.resolve_cache_key( + license.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), + file=path, + ), + extra_args={"is_podling": is_podling}, + ), + queued( + asf_uid, + sql.TaskType.LICENSE_HEADERS, + release, + revision, + data, + path, + check_cache_key=await checks.resolve_cache_key( + license.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release), + file=path, + ), + ), + queued( + asf_uid, + sql.TaskType.RAT_CHECK, + release, + revision, + data, + path, + check_cache_key=await checks.resolve_cache_key( + rat.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release), + file=path, + ), + ), + queued( + asf_uid, + sql.TaskType.TARGZ_INTEGRITY, + release, + revision, + data, + path, + check_cache_key=await checks.resolve_cache_key( + zipformat.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release), + file=path, + ), + ), + queued( + asf_uid, + sql.TaskType.TARGZ_STRUCTURE, + release, + revision, + data, + path, + check_cache_key=await checks.resolve_cache_key( + zipformat.INPUT_POLICY_KEYS, + release, + revision, + checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release), + file=path, + ), ), - queued(asf_uid, sql.TaskType.LICENSE_HEADERS, release, revision, data, path), - queued(asf_uid, sql.TaskType.RAT_CHECK, release, revision, data, path), - queued(asf_uid, sql.TaskType.ZIPFORMAT_INTEGRITY, release, revision, data, path), - queued(asf_uid, sql.TaskType.ZIPFORMAT_STRUCTURE, release, revision, data, path), ] return await asyncio.gather(*tasks) diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py index 1b3c1760..a6494cae 100644 --- a/atr/tasks/checks/__init__.py +++ b/atr/tasks/checks/__init__.py @@ -32,6 +32,7 @@ if TYPE_CHECKING: import atr.models.schema as schema +import atr.attestable as attestable import atr.config as config import atr.db as db import atr.file_paths as file_paths @@ -217,7 +218,9 @@ class Recorder: release = await data.release( name=self.release_name, _release_policy=True, _project_release_policy=True ).demand(RuntimeError(f"Release {self.release_name} not found")) - cache_key = await resolve_cache_key(policy_keys, release, self.revision_number, input_args, path=path) + cache_key = await resolve_cache_key( + policy_keys, release, self.revision_number, input_args, file=self.primary_rel_path + ) self.__input_hash = hashing.compute_dict_hash(cache_key) if cache_key else None return True @@ -355,13 +358,19 @@ async def resolve_cache_key( raise ValueError("Must specify either file or path") if not args: args = {} - if path is None: - # We know file isn't None here but type checker doesn't - path = file_paths.revision_path_for_file(release.project_name, release.version, revision, file or "") - file_hash = await hashing.compute_file_hash(path) + attestable_data = await attestable.load(release.project_name, release.version, revision) + if attestable_data: + policy = sql.ReleasePolicy.model_validate(attestable_data.policy) + file_hash = attestable_data.paths[file or ""] + else: + # TODO: Is this fallback valid / necessary? Or should we bail out if there's no attestable data? + policy = release.release_policy or release.project.release_policy + if path is None: + # We know file isn't None here but type checker doesn't + path = file_paths.revision_path_for_file(release.project_name, release.version, revision, file or "") + file_hash = await hashing.compute_file_hash(path) cache_key = {"file_hash": file_hash} - policy = release.release_policy or release.project.release_policy if len(policy_keys) > 0 and policy is not None: policy_dict = policy.model_dump(exclude_none=True) return {**cache_key, **args, **{k: policy_dict[k] for k in policy_keys if k in policy_dict}} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
