This is an automated email from the ASF dual-hosted git repository.
arm pushed a commit to branch check_caching
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/check_caching by this push:
new 829b0ba Include checker name in cache key and tidy up some code.
829b0ba is described below
commit 829b0bab68b28d58c27421741c38dd4732a83318
Author: Alastair McFarlane <[email protected]>
AuthorDate: Thu Feb 12 17:31:16 2026 +0000
Include checker name in cache key and tidy up some code.
---
atr/attestable.py | 4 +-
atr/docs/tasks.md | 2 +-
atr/{hashing.py => hashes.py} | 0
atr/merge.py | 7 +-
atr/storage/readers/checks.py | 6 +-
atr/tasks/__init__.py | 288 +++++++++++---------------
atr/tasks/checks/__init__.py | 17 +-
atr/tasks/checks/{file_hash.py => hashing.py} | 0
atr/tasks/checks/paths.py | 6 +-
9 files changed, 139 insertions(+), 191 deletions(-)
diff --git a/atr/attestable.py b/atr/attestable.py
index e44be1f..b251227 100644
--- a/atr/attestable.py
+++ b/atr/attestable.py
@@ -24,7 +24,7 @@ import aiofiles
import aiofiles.os
import pydantic
-import atr.hashing as hashing
+import atr.hashes as hashes
import atr.log as log
import atr.models.attestable as models
import atr.util as util
@@ -130,7 +130,7 @@ async def paths_to_hashes_and_sizes(directory:
pathlib.Path) -> tuple[dict[str,
if "\\" in path_key:
# TODO: We should centralise this, and forbid some other
characters too
raise ValueError(f"Backslash in path is forbidden: {path_key}")
- path_to_hash[path_key] = await hashing.compute_file_hash(full_path)
+ path_to_hash[path_key] = await hashes.compute_file_hash(full_path)
path_to_size[path_key] = (await aiofiles.os.stat(full_path)).st_size
return path_to_hash, path_to_size
diff --git a/atr/docs/tasks.md b/atr/docs/tasks.md
index 43c8ae2..8761a25 100644
--- a/atr/docs/tasks.md
+++ b/atr/docs/tasks.md
@@ -41,7 +41,7 @@ In `atr/tasks/checks` you will find several modules that
perform these check tas
In `atr/tasks/__init__.py` you will see imports for existing modules where you
can add an import for new check task, for example:
```python
-import atr.tasks.checks.file_hash as file_hash
+import atr.tasks.checks.hashing as file_hash
import atr.tasks.checks.license as license
```
diff --git a/atr/hashing.py b/atr/hashes.py
similarity index 100%
rename from atr/hashing.py
rename to atr/hashes.py
diff --git a/atr/merge.py b/atr/merge.py
index 5ea0fe6..68793eb 100644
--- a/atr/merge.py
+++ b/atr/merge.py
@@ -24,6 +24,7 @@ from typing import TYPE_CHECKING
import aiofiles.os
import atr.attestable as attestable
+import atr.hashes as hashes
import atr.util as util
if TYPE_CHECKING:
@@ -131,7 +132,7 @@ async def _add_from_prior(
if (prior_hashes is not None) and (path in prior_hashes):
n_hashes[path] = prior_hashes[path]
else:
- n_hashes[path] = await attestable.compute_file_hash(target)
+ n_hashes[path] = await hashes.compute_file_hash(target)
stat_result = await aiofiles.os.stat(target)
n_sizes[path] = stat_result.st_size
return prior_hashes
@@ -210,7 +211,7 @@ async def _merge_all_present(
if (prior_hashes is not None) and (path in prior_hashes):
p_hash = prior_hashes[path]
else:
- p_hash = await attestable.compute_file_hash(prior_dir / path)
+ p_hash = await hashes.compute_file_hash(prior_dir / path)
if p_hash != b_hash:
# Case 11 via hash: base and new have the same content but prior
differs
return await _replace_with_prior(
@@ -249,7 +250,7 @@ async def _replace_with_prior(
if (prior_hashes is not None) and (path in prior_hashes):
n_hashes[path] = prior_hashes[path]
else:
- n_hashes[path] = await attestable.compute_file_hash(file_path)
+ n_hashes[path] = await hashes.compute_file_hash(file_path)
stat_result = await aiofiles.os.stat(file_path)
n_sizes[path] = stat_result.st_size
return prior_hashes
diff --git a/atr/storage/readers/checks.py b/atr/storage/readers/checks.py
index 8c5c3e2..f9b7430 100644
--- a/atr/storage/readers/checks.py
+++ b/atr/storage/readers/checks.py
@@ -22,7 +22,7 @@ import importlib
from typing import TYPE_CHECKING
import atr.db as db
-import atr.hashing as hashing
+import atr.hashes as hashes
import atr.models.sql as sql
import atr.storage as storage
import atr.storage.types as types
@@ -55,9 +55,9 @@ async def _filter_check_results_by_hash(
extra_arg_names = []
extra_args = checks.resolve_extra_args(extra_arg_names, release)
cache_key = await checks.resolve_cache_key(
- policy_keys, release, release.latest_revision_number,
extra_args, file=rel_path.name
+ cr.checker, policy_keys, release,
release.latest_revision_number, extra_args, file=rel_path.name
)
- input_hash_by_module[module_path] =
hashing.compute_dict_hash(cache_key) if cache_key else None
+ input_hash_by_module[module_path] =
hashes.compute_dict_hash(cache_key) if cache_key else None
if cr.inputs_hash == input_hash_by_module[module_path]:
filtered_check_results.append(cr)
diff --git a/atr/tasks/__init__.py b/atr/tasks/__init__.py
index 93797bc..18780bd 100644
--- a/atr/tasks/__init__.py
+++ b/atr/tasks/__init__.py
@@ -25,12 +25,12 @@ from typing import Any, Final
import sqlmodel
import atr.db as db
-import atr.hashing as hashing
+import atr.hashes as hashes
import atr.models.results as results
import atr.models.sql as sql
import atr.tasks.checks as checks
import atr.tasks.checks.compare as compare
-import atr.tasks.checks.file_hash as file_hash
+import atr.tasks.checks.hashing as hashing
import atr.tasks.checks.license as license
import atr.tasks.checks.paths as paths
import atr.tasks.checks.rat as rat
@@ -64,6 +64,7 @@ async def asc_checks(
data,
signature_path,
check_cache_key=await checks.resolve_cache_key(
+ resolve(sql.TaskType.SIGNATURE_CHECK),
signature.INPUT_POLICY_KEYS,
release,
revision,
@@ -290,7 +291,7 @@ async def queued(
) -> sql.Task | None:
if check_cache_key is not None:
logging.info("cache key", check_cache_key)
- hash_val = hashing.compute_dict_hash(check_cache_key)
+ hash_val = hashes.compute_dict_hash(check_cache_key)
if not data:
raise RuntimeError("DB Session is required for check_cache_key")
existing = await data.check_result(inputs_hash=hash_val).all()
@@ -317,7 +318,7 @@ def resolve(task_type: sql.TaskType) -> Callable[...,
Awaitable[results.Results
case sql.TaskType.DISTRIBUTION_WORKFLOW:
return gha.trigger_workflow
case sql.TaskType.HASHING_CHECK:
- return file_hash.check
+ return hashing.check
case sql.TaskType.KEYS_IMPORT_FILE:
return keys.import_file
case sql.TaskType.LICENSE_FILES:
@@ -377,10 +378,11 @@ async def sha_checks(
data,
hash_file,
check_cache_key=await checks.resolve_cache_key(
- file_hash.INPUT_POLICY_KEYS,
+ resolve(sql.TaskType.HASHING_CHECK),
+ hashing.INPUT_POLICY_KEYS,
release,
revision,
- checks.resolve_extra_args(file_hash.INPUT_EXTRA_ARGS, release),
+ checks.resolve_extra_args(hashing.INPUT_EXTRA_ARGS, release),
file=hash_file,
),
)
@@ -395,23 +397,56 @@ async def tar_gz_checks(
"""Create check tasks for a .tar.gz or .tgz file."""
# This release has committee, as guaranteed in draft_checks
is_podling = (release.project.committee is not None) and
release.project.committee.is_podling
-
+ compare_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.COMPARE_SOURCE_TREES),
+ compare.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
+ license_h_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.LICENSE_HEADERS),
+ license.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
+ license_f_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.LICENSE_FILES),
+ license.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
+ rat_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.RAT_CHECK),
+ rat.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
+ targz_i_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.TARGZ_INTEGRITY),
+ targz.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
+ targz_s_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.TARGZ_STRUCTURE),
+ targz.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
tasks = [
- queued(
- asf_uid,
- sql.TaskType.COMPARE_SOURCE_TREES,
- release,
- revision,
- data,
- path,
- check_cache_key=await checks.resolve_cache_key(
- compare.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
- ),
+ queued(asf_uid, sql.TaskType.COMPARE_SOURCE_TREES, release, revision,
data, path, check_cache_key=compare_ck),
queued(
asf_uid,
sql.TaskType.LICENSE_FILES,
@@ -419,75 +454,13 @@ async def tar_gz_checks(
revision,
data,
path,
- check_cache_key=await checks.resolve_cache_key(
- license.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
+ check_cache_key=license_f_ck,
extra_args={"is_podling": is_podling},
),
- queued(
- asf_uid,
- sql.TaskType.LICENSE_HEADERS,
- release,
- revision,
- data,
- path,
- check_cache_key=await checks.resolve_cache_key(
- license.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
- ),
- queued(
- asf_uid,
- sql.TaskType.RAT_CHECK,
- release,
- revision,
- data,
- path,
- check_cache_key=await checks.resolve_cache_key(
- rat.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
- ),
- queued(
- asf_uid,
- sql.TaskType.TARGZ_INTEGRITY,
- release,
- revision,
- data,
- path,
- check_cache_key=await checks.resolve_cache_key(
- targz.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
- ),
- queued(
- asf_uid,
- sql.TaskType.TARGZ_STRUCTURE,
- release,
- revision,
- data,
- path,
- check_cache_key=await checks.resolve_cache_key(
- targz.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
- ),
+ queued(asf_uid, sql.TaskType.LICENSE_HEADERS, release, revision, data,
path, check_cache_key=license_h_ck),
+ queued(asf_uid, sql.TaskType.RAT_CHECK, release, revision, data, path,
check_cache_key=rat_ck),
+ queued(asf_uid, sql.TaskType.TARGZ_INTEGRITY, release, revision, data,
path, check_cache_key=targz_i_ck),
+ queued(asf_uid, sql.TaskType.TARGZ_STRUCTURE, release, revision, data,
path, check_cache_key=targz_s_ck),
]
return await asyncio.gather(*tasks)
@@ -526,22 +499,57 @@ async def zip_checks(
"""Create check tasks for a .zip file."""
# This release has committee, as guaranteed in draft_checks
is_podling = (release.project.committee is not None) and
release.project.committee.is_podling
+ compare_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.COMPARE_SOURCE_TREES),
+ compare.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
+ license_h_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.LICENSE_HEADERS),
+ license.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
+ license_f_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.LICENSE_FILES),
+ license.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
+ rat_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.RAT_CHECK),
+ rat.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
+ zip_i_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.ZIPFORMAT_INTEGRITY),
+ zipformat.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
+ zip_s_ck = await checks.resolve_cache_key(
+ resolve(sql.TaskType.ZIPFORMAT_STRUCTURE),
+ zipformat.INPUT_POLICY_KEYS,
+ release,
+ revision,
+ checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
+ file=path,
+ )
+
tasks = [
- queued(
- asf_uid,
- sql.TaskType.COMPARE_SOURCE_TREES,
- release,
- revision,
- data,
- path,
- check_cache_key=await checks.resolve_cache_key(
- compare.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(compare.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
- ),
+ queued(asf_uid, sql.TaskType.COMPARE_SOURCE_TREES, release, revision,
data, path, check_cache_key=compare_ck),
queued(
asf_uid,
sql.TaskType.LICENSE_FILES,
@@ -549,75 +557,13 @@ async def zip_checks(
revision,
data,
path,
- check_cache_key=await checks.resolve_cache_key(
- license.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
+ check_cache_key=license_f_ck,
extra_args={"is_podling": is_podling},
),
- queued(
- asf_uid,
- sql.TaskType.LICENSE_HEADERS,
- release,
- revision,
- data,
- path,
- check_cache_key=await checks.resolve_cache_key(
- license.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(license.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
- ),
- queued(
- asf_uid,
- sql.TaskType.RAT_CHECK,
- release,
- revision,
- data,
- path,
- check_cache_key=await checks.resolve_cache_key(
- rat.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
- ),
- queued(
- asf_uid,
- sql.TaskType.TARGZ_INTEGRITY,
- release,
- revision,
- data,
- path,
- check_cache_key=await checks.resolve_cache_key(
- zipformat.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
- ),
- queued(
- asf_uid,
- sql.TaskType.TARGZ_STRUCTURE,
- release,
- revision,
- data,
- path,
- check_cache_key=await checks.resolve_cache_key(
- zipformat.INPUT_POLICY_KEYS,
- release,
- revision,
- checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
- file=path,
- ),
- ),
+ queued(asf_uid, sql.TaskType.LICENSE_HEADERS, release, revision, data,
path, check_cache_key=license_h_ck),
+ queued(asf_uid, sql.TaskType.RAT_CHECK, release, revision, data, path,
check_cache_key=rat_ck),
+ queued(asf_uid, sql.TaskType.ZIPFORMAT_INTEGRITY, release, revision,
data, path, check_cache_key=zip_i_ck),
+ queued(asf_uid, sql.TaskType.ZIPFORMAT_STRUCTURE, release, revision,
data, path, check_cache_key=zip_s_ck),
]
return await asyncio.gather(*tasks)
diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py
index f340979..0082e18 100644
--- a/atr/tasks/checks/__init__.py
+++ b/atr/tasks/checks/__init__.py
@@ -36,7 +36,7 @@ import atr.attestable as attestable
import atr.config as config
import atr.db as db
import atr.file_paths as file_paths
-import atr.hashing as hashing
+import atr.hashes as hashes
import atr.log as log
import atr.models.sql as sql
import atr.util as util
@@ -78,7 +78,7 @@ class Recorder:
member_rel_path: str | None = None,
afresh: bool = True,
) -> None:
- self.checker = function_key(checker) if callable(checker) else checker
+ self.checker = function_key(checker)
self.release_name = sql.release_name(project_name, version_name)
self.revision_number = revision_number
self.primary_rel_path = primary_rel_path
@@ -223,9 +223,9 @@ class Recorder:
name=self.release_name, _release_policy=True,
_project_release_policy=True
).demand(RuntimeError(f"Release {self.release_name} not found"))
cache_key = await resolve_cache_key(
- policy_keys, release, self.revision_number, input_args,
file=self.primary_rel_path
+ self.checker, policy_keys, release, self.revision_number,
input_args, file=self.primary_rel_path
)
- self.__input_hash = hashing.compute_dict_hash(cache_key) if
cache_key else None
+ self.__input_hash = hashes.compute_dict_hash(cache_key) if
cache_key else None
return True
@property
@@ -346,11 +346,12 @@ class Recorder:
)
-def function_key(func: Callable[..., Any]) -> str:
- return func.__module__ + "." + func.__name__
+def function_key(func: Callable[..., Any] | str) -> str:
+ return func.__module__ + "." + func.__name__ if callable(func) else func
async def resolve_cache_key(
+ checker: str | Callable[..., Any],
policy_keys: list[str],
release: sql.Release,
revision: str,
@@ -372,8 +373,8 @@ async def resolve_cache_key(
if path is None:
# We know file isn't None here but type checker doesn't
path = file_paths.revision_path_for_file(release.project_name,
release.version, revision, file or "")
- file_hash = await hashing.compute_file_hash(path)
- cache_key = {"file_hash": file_hash}
+ file_hash = await hashes.compute_file_hash(path)
+ cache_key = {"file_hash": file_hash, "checker": function_key(checker)}
if len(policy_keys) > 0 and policy is not None:
policy_dict = policy.model_dump(exclude_none=True)
diff --git a/atr/tasks/checks/file_hash.py b/atr/tasks/checks/hashing.py
similarity index 100%
rename from atr/tasks/checks/file_hash.py
rename to atr/tasks/checks/hashing.py
diff --git a/atr/tasks/checks/paths.py b/atr/tasks/checks/paths.py
index d26aa2f..bf0974e 100644
--- a/atr/tasks/checks/paths.py
+++ b/atr/tasks/checks/paths.py
@@ -23,7 +23,7 @@ from typing import Final
import aiofiles.os
import atr.analysis as analysis
-import atr.hashing as hashing
+import atr.hashes as hashes
import atr.log as log
import atr.models.results as results
import atr.tasks.checks as checks
@@ -188,8 +188,8 @@ async def _check_path_process_single(
full_path = base_path / relative_path
relative_path_str = str(relative_path)
- file_hash = await hashing.compute_file_hash(full_path)
- inputs_hash = hashing.compute_dict_hash({"file_hash": file_hash,
"is_podling": is_podling})
+ file_hash = await hashes.compute_file_hash(full_path)
+ inputs_hash = hashes.compute_dict_hash({"file_hash": file_hash,
"is_podling": is_podling})
# For debugging and testing
if (await user.is_admin_async(asf_uid)) and (full_path.name ==
"deliberately_slow_ATR_task_filename.txt"):
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]