This is an automated email from the ASF dual-hosted git repository.
arm pushed a commit to branch check_caching
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/check_caching by this push:
new 7782b70 Include release policy in attestation and pull hashes for
checks from there.
7782b70 is described below
commit 7782b70a0d3d2fedb3a82a80f46c0d7cd71cf0a6
Author: Alastair McFarlane <[email protected]>
AuthorDate: Thu Feb 12 15:37:32 2026 +0000
Include release policy in attestation and pull hashes for checks from there.
---
atr/attestable.py | 5 +-
atr/models/attestable.py | 3 +-
atr/storage/readers/checks.py | 10 +-
atr/storage/writers/revision.py | 5 +-
atr/tasks/__init__.py | 199 +++++++++++++++++++++++++++++++++++++---
atr/tasks/checks/__init__.py | 21 +++--
6 files changed, 213 insertions(+), 30 deletions(-)
diff --git a/atr/attestable.py b/atr/attestable.py
index cac950c..e44be1f 100644
--- a/atr/attestable.py
+++ b/atr/attestable.py
@@ -139,12 +139,13 @@ async def write(
project_name: str,
version_name: str,
revision_number: str,
+ release_policy: dict[str, Any] | None,
uploader_uid: str,
previous: models.AttestableV1 | None,
path_to_hash: dict[str, str],
path_to_size: dict[str, int],
) -> None:
- result = _generate(path_to_hash, path_to_size, revision_number,
uploader_uid, previous)
+ result = _generate(path_to_hash, path_to_size, revision_number,
release_policy, uploader_uid, previous)
file_path = attestable_path(project_name, version_name, revision_number)
await util.atomic_write_file(file_path, result.model_dump_json(indent=2))
paths_result = models.AttestablePathsV1(paths=result.paths)
@@ -191,6 +192,7 @@ def _generate(
path_to_hash: dict[str, str],
path_to_size: dict[str, int],
revision_number: str,
+ release_policy: dict[str, Any] | None,
uploader_uid: str,
previous: models.AttestableV1 | None,
) -> models.AttestableV1:
@@ -205,4 +207,5 @@ def _generate(
return models.AttestableV1(
paths=dict(path_to_hash),
hashes=dict(new_hashes),
+ policy=release_policy or {},
)
diff --git a/atr/models/attestable.py b/atr/models/attestable.py
index 4bc574b..45e3ac3 100644
--- a/atr/models/attestable.py
+++ b/atr/models/attestable.py
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-from typing import Annotated, Literal
+from typing import Annotated, Any, Literal
import pydantic
@@ -36,3 +36,4 @@ class AttestableV1(schema.Strict):
version: Literal[1] = 1
paths: dict[str, str] = schema.factory(dict)
hashes: dict[str, HashEntry] = schema.factory(dict)
+ policy: dict[str, Any] = schema.factory(dict)
diff --git a/atr/storage/readers/checks.py b/atr/storage/readers/checks.py
index 5797ba0..8c5c3e2 100644
--- a/atr/storage/readers/checks.py
+++ b/atr/storage/readers/checks.py
@@ -22,7 +22,6 @@ import importlib
from typing import TYPE_CHECKING
import atr.db as db
-import atr.file_paths as file_paths
import atr.hashing as hashing
import atr.models.sql as sql
import atr.storage as storage
@@ -37,7 +36,7 @@ if TYPE_CHECKING:
async def _filter_check_results_by_hash(
all_check_results: Sequence[sql.CheckResult],
- file_path: pathlib.Path,
+ rel_path: pathlib.Path,
input_hash_by_module: dict[str, str | None],
release: sql.Release,
) -> Sequence[sql.CheckResult]:
@@ -56,7 +55,7 @@ async def _filter_check_results_by_hash(
extra_arg_names = []
extra_args = checks.resolve_extra_args(extra_arg_names, release)
cache_key = await checks.resolve_cache_key(
- policy_keys, release, release.latest_revision_number,
extra_args, path=file_path
+ policy_keys, release, release.latest_revision_number,
extra_args, file=rel_path.name
)
input_hash_by_module[module_path] =
hashing.compute_dict_hash(cache_key) if cache_key else None
@@ -93,16 +92,13 @@ class GeneralPublic:
)
all_check_results = await query.all()
- file_path = file_paths.revision_path_for_file(
- release.project_name, release.version,
release.latest_revision_number, rel_path.name
- )
# Filter to checks for the current file version / policy
# Cache the computed input hash per checker module, since all results
here share the same file and release
input_hash_by_module: dict[str, str | None] = {}
# TODO: This has a bug - create an archive, it'll scan with a hash and
show missing checksum.
# Then generate a checksum. It'll re-scan the file with the same hash,
but now has one. Two checks shown.
filtered_check_results = await _filter_check_results_by_hash(
- all_check_results, file_path, input_hash_by_module, release
+ all_check_results, rel_path, input_hash_by_module, release
)
# Filter out any results that are ignored
diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py
index 5371a0b..cb27360 100644
--- a/atr/storage/writers/revision.py
+++ b/atr/storage/writers/revision.py
@@ -118,7 +118,7 @@ class CommitteeParticipant(FoundationCommitter):
# Get the release
release_name = sql.release_name(project_name, version_name)
async with db.session() as data:
- release = await data.release(name=release_name).demand(
+ release = await data.release(name=release_name,
_release_policy=True, _project_release_policy=True).demand(
RuntimeError("Release does not exist for new revision
creation")
)
old_revision = await interaction.latest_revision(release)
@@ -243,10 +243,13 @@ class CommitteeParticipant(FoundationCommitter):
await aioshutil.rmtree(temp_dir)
raise
+ policy = release.release_policy or release.project.release_policy
+
await attestable.write(
project_name,
version_name,
new_revision.number,
+ policy.model_dump() if policy else None,
asf_uid,
previous_attestable,
path_to_hash,
diff --git a/atr/tasks/__init__.py b/atr/tasks/__init__.py
index 9eca7a8..93797bc 100644
--- a/atr/tasks/__init__.py
+++ b/atr/tasks/__init__.py
@@ -63,7 +63,13 @@ async def asc_checks(
revision,
data,
signature_path,
- {"committee_name": release.committee.name},
+ check_cache_key=await checks.resolve_cache_key(
+ signature.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(signature.INPUT_EXTRA_ARGS,
release),
+ ),
+ extra_args={"committee_name": release.committee.name},
)
)
@@ -362,7 +368,23 @@ async def sha_checks(
"""Create hash check task for a .sha256 or .sha512 file."""
tasks = []
- tasks.append(queued(asf_uid, sql.TaskType.HASHING_CHECK, release,
revision, data, hash_file))
+ tasks.append(
+ queued(
+ asf_uid,
+ sql.TaskType.HASHING_CHECK,
+ release,
+ revision,
+ data,
+ hash_file,
+ check_cache_key=await checks.resolve_cache_key(
+ file_hash.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(file_hash.INPUT_EXTRA_ARGS, release),
+ file=hash_file,
+ ),
+ )
+ )
return await asyncio.gather(*tasks)
@@ -375,7 +397,21 @@ async def tar_gz_checks(
is_podling = (release.project.committee is not None) and
release.project.committee.is_podling
tasks = [
- queued(asf_uid, sql.TaskType.COMPARE_SOURCE_TREES, release, revision,
data, path),
+ queued(
+ asf_uid,
+ sql.TaskType.COMPARE_SOURCE_TREES,
+ release,
+ revision,
+ data,
+ path,
+ check_cache_key=await checks.resolve_cache_key(
+ compare.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release),
+ file=path,
+ ),
+ ),
queued(
asf_uid,
sql.TaskType.LICENSE_FILES,
@@ -384,7 +420,11 @@ async def tar_gz_checks(
data,
path,
check_cache_key=await checks.resolve_cache_key(
- license.INPUT_POLICY_KEYS, release, revision,
{**{"is_podling": is_podling}}, file=path
+ license.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+ file=path,
),
extra_args={"is_podling": is_podling},
),
@@ -395,11 +435,59 @@ async def tar_gz_checks(
revision,
data,
path,
- check_cache_key=await
checks.resolve_cache_key(license.INPUT_POLICY_KEYS, release, revision,
file=path),
+ check_cache_key=await checks.resolve_cache_key(
+ license.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+ file=path,
+ ),
+ ),
+ queued(
+ asf_uid,
+ sql.TaskType.RAT_CHECK,
+ release,
+ revision,
+ data,
+ path,
+ check_cache_key=await checks.resolve_cache_key(
+ rat.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
+ file=path,
+ ),
+ ),
+ queued(
+ asf_uid,
+ sql.TaskType.TARGZ_INTEGRITY,
+ release,
+ revision,
+ data,
+ path,
+ check_cache_key=await checks.resolve_cache_key(
+ targz.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
+ file=path,
+ ),
+ ),
+ queued(
+ asf_uid,
+ sql.TaskType.TARGZ_STRUCTURE,
+ release,
+ revision,
+ data,
+ path,
+ check_cache_key=await checks.resolve_cache_key(
+ targz.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
+ file=path,
+ ),
),
- queued(asf_uid, sql.TaskType.RAT_CHECK, release, revision, data, path),
- queued(asf_uid, sql.TaskType.TARGZ_INTEGRITY, release, revision, data,
path),
- queued(asf_uid, sql.TaskType.TARGZ_STRUCTURE, release, revision, data,
path),
]
return await asyncio.gather(*tasks)
@@ -439,14 +527,97 @@ async def zip_checks(
# This release has committee, as guaranteed in draft_checks
is_podling = (release.project.committee is not None) and
release.project.committee.is_podling
tasks = [
- queued(asf_uid, sql.TaskType.COMPARE_SOURCE_TREES, release, revision,
data, path),
queued(
- asf_uid, sql.TaskType.LICENSE_FILES, release, revision, data,
path, extra_args={"is_podling": is_podling}
+ asf_uid,
+ sql.TaskType.COMPARE_SOURCE_TREES,
+ release,
+ revision,
+ data,
+ path,
+ check_cache_key=await checks.resolve_cache_key(
+ compare.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release),
+ file=path,
+ ),
+ ),
+ queued(
+ asf_uid,
+ sql.TaskType.LICENSE_FILES,
+ release,
+ revision,
+ data,
+ path,
+ check_cache_key=await checks.resolve_cache_key(
+ license.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+ file=path,
+ ),
+ extra_args={"is_podling": is_podling},
+ ),
+ queued(
+ asf_uid,
+ sql.TaskType.LICENSE_HEADERS,
+ release,
+ revision,
+ data,
+ path,
+ check_cache_key=await checks.resolve_cache_key(
+ license.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+ file=path,
+ ),
+ ),
+ queued(
+ asf_uid,
+ sql.TaskType.RAT_CHECK,
+ release,
+ revision,
+ data,
+ path,
+ check_cache_key=await checks.resolve_cache_key(
+ rat.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
+ file=path,
+ ),
+ ),
+ queued(
+ asf_uid,
+ sql.TaskType.TARGZ_INTEGRITY,
+ release,
+ revision,
+ data,
+ path,
+ check_cache_key=await checks.resolve_cache_key(
+ zipformat.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
+ file=path,
+ ),
+ ),
+ queued(
+ asf_uid,
+ sql.TaskType.TARGZ_STRUCTURE,
+ release,
+ revision,
+ data,
+ path,
+ check_cache_key=await checks.resolve_cache_key(
+ zipformat.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
+ file=path,
+ ),
),
- queued(asf_uid, sql.TaskType.LICENSE_HEADERS, release, revision, data,
path),
- queued(asf_uid, sql.TaskType.RAT_CHECK, release, revision, data, path),
- queued(asf_uid, sql.TaskType.ZIPFORMAT_INTEGRITY, release, revision,
data, path),
- queued(asf_uid, sql.TaskType.ZIPFORMAT_STRUCTURE, release, revision,
data, path),
]
return await asyncio.gather(*tasks)
diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py
index 0957801..f340979 100644
--- a/atr/tasks/checks/__init__.py
+++ b/atr/tasks/checks/__init__.py
@@ -32,6 +32,7 @@ if TYPE_CHECKING:
import atr.models.schema as schema
+import atr.attestable as attestable
import atr.config as config
import atr.db as db
import atr.file_paths as file_paths
@@ -221,7 +222,9 @@ class Recorder:
release = await data.release(
name=self.release_name, _release_policy=True,
_project_release_policy=True
).demand(RuntimeError(f"Release {self.release_name} not found"))
- cache_key = await resolve_cache_key(policy_keys, release,
self.revision_number, input_args, path=path)
+ cache_key = await resolve_cache_key(
+ policy_keys, release, self.revision_number, input_args,
file=self.primary_rel_path
+ )
self.__input_hash = hashing.compute_dict_hash(cache_key) if
cache_key else None
return True
@@ -359,13 +362,19 @@ async def resolve_cache_key(
raise ValueError("Must specify either file or path")
if not args:
args = {}
- if path is None:
- # We know file isn't None here but type checker doesn't
- path = file_paths.revision_path_for_file(release.project_name,
release.version, revision, file or "")
- file_hash = await hashing.compute_file_hash(path)
+ attestable_data = await attestable.load(release.project_name,
release.version, revision)
+ if attestable_data:
+ policy = sql.ReleasePolicy.model_validate(attestable_data.policy)
+ file_hash = attestable_data.paths[file or ""]
+ else:
+ # TODO: Is this fallback valid / necessary? Or should we bail out if
there's no attestable data?
+ policy = release.release_policy or release.project.release_policy
+ if path is None:
+ # We know file isn't None here but type checker doesn't
+ path = file_paths.revision_path_for_file(release.project_name,
release.version, revision, file or "")
+ file_hash = await hashing.compute_file_hash(path)
cache_key = {"file_hash": file_hash}
- policy = release.release_policy or release.project.release_policy
if len(policy_keys) > 0 and policy is not None:
policy_dict = policy.model_dump(exclude_none=True)
return {**cache_key, **args, **{k: policy_dict[k] for k in policy_keys
if k in policy_dict}}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]