This is an automated email from the ASF dual-hosted git repository.
arm pushed a commit to branch arm
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/arm by this push:
new ef1c4b9e #641 - some initial migrations out of the util module for
paths and hash calculation.
ef1c4b9e is described below
commit ef1c4b9e79a15776dce99d1c6f7451ee6b31df27
Author: Alastair McFarlane <[email protected]>
AuthorDate: Tue Feb 24 11:56:55 2026 +0000
#641 - some initial migrations out of the util module for paths and hash
calculation.
---
atr/admin/__init__.py | 7 ++-
atr/api/__init__.py | 16 ++---
atr/attestable.py | 11 ++--
atr/construct.py | 5 +-
atr/file_paths.py | 28 ---------
atr/get/checks.py | 31 +++++-----
atr/get/download.py | 7 ++-
atr/get/draft.py | 3 +-
atr/get/file.py | 7 ++-
atr/get/finish.py | 3 +-
atr/get/published.py | 3 +-
atr/get/report.py | 3 +-
atr/get/revisions.py | 3 +-
atr/get/test.py | 3 +-
atr/get/voting.py | 5 +-
atr/hashes.py | 25 +++++++-
atr/paths.py | 131 ++++++++++++++++++++++++++++++++++++++++
atr/post/upload.py | 5 +-
atr/server.py | 15 ++---
atr/shared/web.py | 13 ++--
atr/ssh.py | 3 +-
atr/storage/readers/releases.py | 9 +--
atr/storage/writers/announce.py | 7 ++-
atr/storage/writers/keys.py | 5 +-
atr/storage/writers/policy.py | 18 +++---
atr/storage/writers/release.py | 9 +--
atr/storage/writers/revision.py | 11 ++--
atr/tasks/__init__.py | 3 +-
atr/tasks/checks/__init__.py | 12 ++--
atr/tasks/checks/compare.py | 3 +-
atr/tasks/sbom.py | 11 ++--
atr/util.py | 126 +-------------------------------------
atr/validate.py | 4 +-
tests/unit/test_paths.py | 45 ++++++++++++++
tests/unit/test_util.py | 23 -------
35 files changed, 331 insertions(+), 282 deletions(-)
diff --git a/atr/admin/__init__.py b/atr/admin/__init__.py
index 27dd52d2..0a97d2f8 100644
--- a/atr/admin/__init__.py
+++ b/atr/admin/__init__.py
@@ -49,6 +49,7 @@ import atr.log as log
import atr.mapping as mapping
import atr.models.session
import atr.models.sql as sql
+import atr.paths as paths
import atr.principal as principal
import atr.storage as storage
import atr.storage.outcome as outcome
@@ -205,7 +206,7 @@ async def consistency(session: web.Committer) ->
web.TextResponse:
releases = await data.release().all()
database_dirs = []
for release in releases:
- path = util.release_directory_version(release)
+ path = paths.release_directory_version(release)
database_dirs.append(str(path))
if len(set(database_dirs)) != len(database_dirs):
raise base.ASFQuartException("Duplicate release directories in
database", errorcode=500)
@@ -1092,7 +1093,7 @@ async def _get_filesystem_dirs() -> list[str]:
async def _get_filesystem_dirs_finished(filesystem_dirs: list[str]) -> None:
- finished_dir = util.get_finished_dir()
+ finished_dir = paths.get_finished_dir()
finished_dir_contents = await aiofiles.os.listdir(finished_dir)
for project_dir in finished_dir_contents:
project_dir_path = os.path.join(finished_dir, project_dir)
@@ -1105,7 +1106,7 @@ async def _get_filesystem_dirs_finished(filesystem_dirs:
list[str]) -> None:
async def _get_filesystem_dirs_unfinished(filesystem_dirs: list[str]) -> None:
- unfinished_dir = util.get_unfinished_dir()
+ unfinished_dir = paths.get_unfinished_dir()
unfinished_dir_contents = await aiofiles.os.listdir(unfinished_dir)
for project_dir in unfinished_dir_contents:
project_dir_path = os.path.join(unfinished_dir, project_dir)
diff --git a/atr/api/__init__.py b/atr/api/__init__.py
index 432af5f8..7e2c6117 100644
--- a/atr/api/__init__.py
+++ b/atr/api/__init__.py
@@ -33,10 +33,12 @@ import atr.blueprints.api as api
import atr.config as config
import atr.db as db
import atr.db.interaction as interaction
+import atr.hashes as hashes
import atr.jwtoken as jwtoken
import atr.log as log
import atr.models as models
import atr.models.sql as sql
+import atr.paths as paths
import atr.principal as principal
import atr.storage as storage
import atr.storage.outcome as outcome
@@ -949,10 +951,10 @@ async def release_paths(project: str, version: str,
revision: str | None = None)
release_name = sql.release_name(project, version)
release = await
data.release(name=release_name).demand(exceptions.NotFound())
if revision is None:
- dir_path = util.release_directory(release)
+ dir_path = paths.release_directory(release)
else:
await data.revision(release_name=release_name,
number=revision).demand(exceptions.NotFound())
- dir_path = util.release_directory_version(release) / revision
+ dir_path = paths.release_directory_version(release) / revision
if not (await aiofiles.os.path.isdir(dir_path)):
raise exceptions.NotFound("Files not found")
files: list[str] = [str(path) for path in [p async for p in
util.paths_recursive(dir_path)]]
@@ -1086,14 +1088,14 @@ async def signature_provenance(data:
models.api.SignatureProvenanceArgs) -> Dict
)
)
- downloads_dir = util.get_downloads_dir()
- matched_committee_names = await _match_committee_names(key.committees,
util.get_finished_dir(), data)
+ downloads_dir = paths.get_downloads_dir()
+ matched_committee_names = await _match_committee_names(key.committees,
paths.get_finished_dir(), data)
for matched_committee_name in matched_committee_names:
keys_file_path = downloads_dir / matched_committee_name / "KEYS"
async with aiofiles.open(keys_file_path, "rb") as f:
keys_file_data = await f.read()
- keys_file_sha3_256 = hashlib.sha3_256(keys_file_data).hexdigest()
+ keys_file_sha3_256 = hashes.compute_sha3_256(keys_file_data)
signing_keys.append(
models.api.SignatureProvenanceKey(
committee=matched_committee_name,
@@ -1422,7 +1424,7 @@ async def _match_committee_names(
key_committees: list[sql.Committee], finished_dir: pathlib.Path, data:
models.api.SignatureProvenanceArgs
) -> set[str]:
key_committee_names = set(committee.name for committee in key_committees)
- finished_dir = util.get_finished_dir()
+ finished_dir = paths.get_finished_dir()
matched_committee_names = set()
# Check for finished files
@@ -1446,7 +1448,7 @@ async def _match_committee_names(
projects = await
db_data.project(committee_name=key_committee_name).all()
for project in projects:
releases = await
db_data.release(project_name=project.name).all()
- release_directories.extend(util.release_directory(release) for
release in releases)
+ release_directories.extend(paths.release_directory(release)
for release in releases)
for release_directory in release_directories:
if await _match_unfinished(release_directory, data):
matched_committee_names.add(key_committee_name)
diff --git a/atr/attestable.py b/atr/attestable.py
index 91a65470..eedf927d 100644
--- a/atr/attestable.py
+++ b/atr/attestable.py
@@ -27,6 +27,7 @@ import pydantic
import atr.hashes as hashes
import atr.log as log
import atr.models.attestable as models
+import atr.paths as paths
import atr.util as util
if TYPE_CHECKING:
@@ -34,19 +35,19 @@ if TYPE_CHECKING:
def attestable_checks_path(project_name: str, version_name: str,
revision_number: str) -> pathlib.Path:
- return util.get_attestable_dir() / project_name / version_name /
f"{revision_number}.checks.json"
+ return paths.get_attestable_dir() / project_name / version_name /
f"{revision_number}.checks.json"
def attestable_path(project_name: str, version_name: str, revision_number:
str) -> pathlib.Path:
- return util.get_attestable_dir() / project_name / version_name /
f"{revision_number}.json"
+ return paths.get_attestable_dir() / project_name / version_name /
f"{revision_number}.json"
def attestable_paths_path(project_name: str, version_name: str,
revision_number: str) -> pathlib.Path:
- return util.get_attestable_dir() / project_name / version_name /
f"{revision_number}.paths.json"
+ return paths.get_attestable_dir() / project_name / version_name /
f"{revision_number}.paths.json"
def github_tp_payload_path(project_name: str, version_name: str,
revision_number: str) -> pathlib.Path:
- return util.get_attestable_dir() / project_name / version_name /
f"{revision_number}.github-tp.json"
+ return paths.get_attestable_dir() / project_name / version_name /
f"{revision_number}.github-tp.json"
async def github_tp_payload_write(
@@ -116,7 +117,7 @@ async def load_paths(
def migrate_to_paths_files() -> int:
- attestable_dir = util.get_attestable_dir()
+ attestable_dir = paths.get_attestable_dir()
if not attestable_dir.is_dir():
return 0
count = 0
diff --git a/atr/construct.py b/atr/construct.py
index 55e8a689..f36ba1a6 100644
--- a/atr/construct.py
+++ b/atr/construct.py
@@ -26,6 +26,7 @@ import quart
import atr.config as config
import atr.db as db
import atr.models.sql as sql
+import atr.paths as paths
import atr.util as util
type Context = Literal["announce", "announce_subject", "checklist", "vote",
"vote_subject"]
@@ -227,11 +228,11 @@ async def start_vote_subject_and_body(subject: str, body:
str, options: StartVot
# Therefore there is no route handler, so we have to construct the URL
manually
keys_file = None
if committee.is_podling:
- keys_file_path = util.get_downloads_dir() / "incubator" /
committee.name / "KEYS"
+ keys_file_path = paths.get_downloads_dir() / "incubator" /
committee.name / "KEYS"
if await aiofiles.os.path.isfile(keys_file_path):
keys_file =
f"https://{host}/downloads/incubator/{committee.name}/KEYS"
else:
- keys_file_path = util.get_downloads_dir() / committee.name / "KEYS"
+ keys_file_path = paths.get_downloads_dir() / committee.name / "KEYS"
if await aiofiles.os.path.isfile(keys_file_path):
keys_file = f"https://{host}/downloads/{committee.name}/KEYS"
diff --git a/atr/file_paths.py b/atr/file_paths.py
deleted file mode 100644
index d29d6b96..00000000
--- a/atr/file_paths.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pathlib
-
-import atr.util as util
-
-
-def base_path_for_revision(project_name: str, version_name: str, revision:
str) -> pathlib.Path:
- return pathlib.Path(util.get_unfinished_dir(), project_name, version_name,
revision)
-
-
-def revision_path_for_file(project_name: str, version_name: str, revision:
str, file_name: str) -> pathlib.Path:
- return base_path_for_revision(project_name, version_name, revision) /
file_name
diff --git a/atr/get/checks.py b/atr/get/checks.py
index 36b7b4dc..8faa5ce2 100644
--- a/atr/get/checks.py
+++ b/atr/get/checks.py
@@ -34,6 +34,7 @@ import atr.get.sbom as sbom
import atr.get.vote as vote
import atr.htm as htm
import atr.models.sql as sql
+import atr.paths as paths
import atr.post as post
import atr.render as render
import atr.shared as shared
@@ -85,14 +86,14 @@ class FileStats(NamedTuple):
async def get_file_totals(release: sql.Release, session: web.Committer | None)
-> FileStats:
"""Get file level check totals after ignores are applied."""
- base_path = util.release_directory(release)
- paths = [path async for path in util.paths_recursive(base_path)]
+ base_path = paths.release_directory(release)
+ all_paths = [path async for path in util.paths_recursive(base_path)]
async with storage.read(session) as read:
ragp = read.as_general_public()
match_ignore = await ragp.checks.ignores_matcher(release.project_name)
- _, totals = await _compute_stats(release, paths, match_ignore)
+ _, totals = await _compute_stats(release, all_paths, match_ignore)
return totals
@@ -110,22 +111,22 @@ async def selected(session: web.Committer | None,
project_name: str, version_nam
if release.committee is None:
raise ValueError("Release has no committee")
- base_path = util.release_directory(release)
- paths = [path async for path in util.paths_recursive(base_path)]
- paths.sort()
+ base_path = paths.release_directory(release)
+ all_paths = [path async for path in util.paths_recursive(base_path)]
+ all_paths.sort()
async with storage.read(session) as read:
ragp = read.as_general_public()
match_ignore = await ragp.checks.ignores_matcher(release.project_name)
- per_file_stats, totals = await _compute_stats(release, paths, match_ignore)
+ per_file_stats, totals = await _compute_stats(release, all_paths,
match_ignore)
page = htm.Block()
_render_header(page, release)
- _render_summary(page, totals, paths, per_file_stats)
- _render_checks_table(page, release, paths, per_file_stats)
+ _render_summary(page, totals, all_paths, per_file_stats)
+ _render_checks_table(page, release, all_paths, per_file_stats)
_render_ignores_section(page, release)
- _render_debug_table(page, paths, per_file_stats)
+ _render_debug_table(page, all_paths, per_file_stats)
return await template.blank(
f"File checks for {release.project.short_display_name}
{release.version}",
@@ -150,13 +151,13 @@ async def selected_revision(
_project_release_policy=True,
).demand(base.ASFQuartException("Release does not exist",
errorcode=404))
- base_path = util.release_directory(release)
- paths = [path async for path in util.paths_recursive(base_path)]
- paths.sort()
+ base_path = paths.release_directory(release)
+ all_paths = [path async for path in util.paths_recursive(base_path)]
+ all_paths.sort()
async with storage.read(session) as read:
ragp = read.as_general_public()
- info = await ragp.releases.path_info(release, paths)
+ info = await ragp.releases.path_info(release, all_paths)
ongoing_count = await interaction.tasks_ongoing(project_name,
version_name, revision_number)
@@ -165,7 +166,7 @@ async def selected_revision(
delete_file_forms: dict[str, str] = {}
if release.phase == sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT:
- for path in paths:
+ for path in all_paths:
delete_file_forms[str(path)] = str(
form.render(
model_cls=draft.DeleteFileForm,
diff --git a/atr/get/download.py b/atr/get/download.py
index 7a25aea9..bb8d5145 100644
--- a/atr/get/download.py
+++ b/atr/get/download.py
@@ -31,6 +31,7 @@ import atr.form as form
import atr.htm as htm
import atr.mapping as mapping
import atr.models.sql as sql
+import atr.paths as paths
import atr.template as template
import atr.util as util
import atr.web as web
@@ -116,7 +117,7 @@ async def zip_selected(session: web.Committer,
project_name: str, version_name:
except Exception as e:
return web.TextResponse(f"Server error: {e}", status=500)
- base_dir = util.release_directory(release)
+ base_dir = paths.release_directory(release)
files_to_zip = []
try:
async for rel_path in util.paths_recursive(base_dir):
@@ -155,7 +156,7 @@ async def _download_or_list(project_name: str,
version_name: str, file_path: str
release = await data.release(project_name=project_name,
version=version_name).demand(
base.ASFQuartException("Release does not exist", errorcode=404)
)
- full_path = util.release_directory(release) / validated_path
+ full_path = paths.release_directory(release) / validated_path
if await aiofiles.os.path.isdir(full_path):
return await _list(validated_path, full_path, project_name,
version_name, str(validated_path))
@@ -175,7 +176,7 @@ async def _download_or_list(project_name: str,
version_name: str, file_path: str
async def _generate_file_url_list(release: sql.Release) -> str:
- base_dir = util.release_directory(release)
+ base_dir = paths.release_directory(release)
urls = []
async for rel_path in util.paths_recursive(base_dir):
full_item_path = base_dir / rel_path
diff --git a/atr/get/draft.py b/atr/get/draft.py
index 763584a5..16a091ac 100644
--- a/atr/get/draft.py
+++ b/atr/get/draft.py
@@ -24,6 +24,7 @@ import asfquart.base as base
import atr.blueprints.get as get
import atr.form as form
+import atr.paths as paths
import atr.post as post
import atr.shared as shared
import atr.template as template
@@ -41,7 +42,7 @@ async def tools(session: web.Committer, project_name: str,
version_name: str, fi
raise base.ASFQuartException("Invalid file path", errorcode=400)
release = await session.release(project_name, version_name)
- full_path = str(util.release_directory(release) / validated_path)
+ full_path = str(paths.release_directory(release) / validated_path)
# Check that the file exists
if not await aiofiles.os.path.exists(full_path):
diff --git a/atr/get/file.py b/atr/get/file.py
index b4f968f4..2c004b55 100644
--- a/atr/get/file.py
+++ b/atr/get/file.py
@@ -24,6 +24,7 @@ import atr.get.finish as finish
import atr.get.vote as vote
import atr.htm as htm
import atr.models.sql as sql
+import atr.paths as paths
import atr.render as render
import atr.template as template
import atr.util as util
@@ -42,11 +43,11 @@ async def selected(session: web.Committer, project_name:
str, version_name: str)
revision_number = release.latest_revision_number
file_stats = []
if release.phase == sql.ReleasePhase.RELEASE:
- file_stats = [stat async for stat in
util.content_list(util.get_finished_dir(), project_name, version_name)]
+ file_stats = [stat async for stat in
util.content_list(paths.get_finished_dir(), project_name, version_name)]
elif revision_number is not None:
file_stats = [
stat
- async for stat in util.content_list(util.get_unfinished_dir(),
project_name, version_name, revision_number)
+ async for stat in util.content_list(paths.get_unfinished_dir(),
project_name, version_name, revision_number)
]
else:
raise ValueError("No revision number found for unfinished release")
@@ -135,7 +136,7 @@ async def selected_path(session: web.Committer,
project_name: str, version_name:
release = await session.release(project_name, version_name, phase=None)
_max_view_size = 512 * 1024
- full_path = util.release_directory(release) / validated_path
+ full_path = paths.release_directory(release) / validated_path
content_listing = await util.archive_listing(full_path)
content, is_text, is_truncated, error_message = await
util.read_file_for_viewer(full_path, _max_view_size)
diff --git a/atr/get/finish.py b/atr/get/finish.py
index 39d36907..ac95b2c1 100644
--- a/atr/get/finish.py
+++ b/atr/get/finish.py
@@ -40,6 +40,7 @@ import atr.get.root as root
import atr.htm as htm
import atr.mapping as mapping
import atr.models.sql as sql
+import atr.paths as paths
import atr.render as render
import atr.shared as shared
import atr.tasks.gha as gha
@@ -178,7 +179,7 @@ async def _get_page_data(
if release.phase != sql.ReleasePhase.RELEASE_PREVIEW:
raise ValueError("Release is not in preview phase")
- latest_revision_dir = util.release_directory(release)
+ latest_revision_dir = paths.release_directory(release)
source_files_rel, target_dirs = await
_sources_and_targets(latest_revision_dir)
deletable_dirs = await _deletable_choices(latest_revision_dir, target_dirs)
rc_analysis_result = await _analyse_rc_tags(latest_revision_dir)
diff --git a/atr/get/published.py b/atr/get/published.py
index cce3185d..fe6dacff 100644
--- a/atr/get/published.py
+++ b/atr/get/published.py
@@ -25,6 +25,7 @@ import quart
import atr.blueprints.get as get
import atr.form as form
import atr.htm as htm
+import atr.paths as paths
import atr.util as util
import atr.web as web
@@ -105,7 +106,7 @@ async def _file_content(full_path: pathlib.Path) ->
web.QuartResponse:
async def _path(session: web.Committer, path: str) -> web.QuartResponse:
- downloads_path = util.get_downloads_dir()
+ downloads_path = paths.get_downloads_dir()
full_path = downloads_path / path
if await aiofiles.os.path.isdir(full_path):
return await _directory_listing(full_path, path)
diff --git a/atr/get/report.py b/atr/get/report.py
index c987755a..a3d194c9 100644
--- a/atr/get/report.py
+++ b/atr/get/report.py
@@ -23,6 +23,7 @@ import asfquart.base as base
import atr.blueprints.get as get
import atr.form as form
import atr.models.sql as sql
+import atr.paths as paths
import atr.storage as storage
import atr.template as template
import atr.util as util
@@ -57,7 +58,7 @@ async def selected_path(session: web.Committer, project_name:
str, version_name:
raise base.ASFQuartException("Release has no committee", errorcode=500)
# TODO: When we do more than one thing in a dir, we should use the
revision directory directly
- abs_path = util.release_directory(release) / validated_path
+ abs_path = paths.release_directory(release) / validated_path
if release.latest_revision_number is None:
raise base.ASFQuartException("Release has no revision", errorcode=500)
diff --git a/atr/get/revisions.py b/atr/get/revisions.py
index aeb805bd..680227a1 100644
--- a/atr/get/revisions.py
+++ b/atr/get/revisions.py
@@ -33,6 +33,7 @@ import atr.get.root as root
import atr.htm as htm
import atr.models.schema as schema
import atr.models.sql as sql
+import atr.paths as paths
import atr.post as post
import atr.shared as shared
import atr.template as template
@@ -57,7 +58,7 @@ async def selected(session: web.Committer, project_name: str,
version_name: str)
except base.ASFQuartException:
release = await session.release(project_name, version_name,
phase=sql.ReleasePhase.RELEASE_PREVIEW)
phase_key = "preview"
- release_dir = util.release_directory_base(release)
+ release_dir = paths.release_directory_base(release)
# Determine the current revision
latest_revision_number = release.latest_revision_number
diff --git a/atr/get/test.py b/atr/get/test.py
index 73daa1b7..ad70cd64 100644
--- a/atr/get/test.py
+++ b/atr/get/test.py
@@ -31,6 +31,7 @@ import atr.get.vote as vote
import atr.htm as htm
import atr.models.session
import atr.models.sql as sql
+import atr.paths as paths
import atr.shared as shared
import atr.storage as storage
import atr.template as template
@@ -116,7 +117,7 @@ async def test_merge(session: web.Committer, project_name:
str, version_name: st
release = await data.release(name=release_name, _project=True).demand(
RuntimeError("Release not found after merge test")
)
- release_dir = util.release_directory(release)
+ release_dir = paths.release_directory(release)
async for path in util.paths_recursive(release_dir):
files.append(str(path))
diff --git a/atr/get/voting.py b/atr/get/voting.py
index 83eea195..665cff66 100644
--- a/atr/get/voting.py
+++ b/atr/get/voting.py
@@ -29,6 +29,7 @@ import atr.get.keys as keys
import atr.get.projects as projects
import atr.htm as htm
import atr.models.sql as sql
+import atr.paths as paths
import atr.post as post
import atr.render as render
import atr.shared as shared
@@ -102,9 +103,9 @@ async def selected_revision(
async def _check_keys_warning(committee: sql.Committee) -> bool:
if committee.is_podling:
- keys_file_path = util.get_downloads_dir() / "incubator" /
committee.name / "KEYS"
+ keys_file_path = paths.get_downloads_dir() / "incubator" /
committee.name / "KEYS"
else:
- keys_file_path = util.get_downloads_dir() / committee.name / "KEYS"
+ keys_file_path = paths.get_downloads_dir() / committee.name / "KEYS"
return not await aiofiles.os.path.isfile(keys_file_path)
diff --git a/atr/hashes.py b/atr/hashes.py
index de29a760..35c5ac0c 100644
--- a/atr/hashes.py
+++ b/atr/hashes.py
@@ -14,7 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+import hashlib
import pathlib
from typing import Any, Final
@@ -40,3 +40,26 @@ async def compute_file_hash(path: str | pathlib.Path) -> str:
while chunk := await f.read(_HASH_CHUNK_SIZE):
hasher.update(chunk)
return f"blake3:{hasher.hexdigest()}"
+
+
+def compute_sha3_256(file_data: bytes) -> str:
+ """Compute SHA3-256 hash of file data."""
+ return hashlib.sha3_256(file_data).hexdigest()
+
+
+async def compute_sha512(file_path: pathlib.Path) -> str:
+ """Compute SHA-512 hash of a file."""
+ sha512 = hashlib.sha512()
+ async with aiofiles.open(file_path, "rb") as f:
+ while chunk := await f.read(4096):
+ sha512.update(chunk)
+ return sha512.hexdigest()
+
+
+async def file_sha3(path: str) -> str:
+ """Compute SHA3-256 hash of a file."""
+ sha3 = hashlib.sha3_256()
+ async with aiofiles.open(path, "rb") as f:
+ while chunk := await f.read(4096):
+ sha3.update(chunk)
+ return sha3.hexdigest()
diff --git a/atr/paths.py b/atr/paths.py
new file mode 100644
index 00000000..0bc556e0
--- /dev/null
+++ b/atr/paths.py
@@ -0,0 +1,131 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pathlib
+
+from atr import config as config
+from atr.models import sql as sql
+
+
+def base_path_for_revision(project_name: str, version_name: str, revision:
str) -> pathlib.Path:
+ return pathlib.Path(get_unfinished_dir(), project_name, version_name,
revision)
+
+
+def revision_path_for_file(project_name: str, version_name: str, revision:
str, file_name: str) -> pathlib.Path:
+ return base_path_for_revision(project_name, version_name, revision) /
file_name
+
+
+def get_attestable_dir() -> pathlib.Path:
+ return pathlib.Path(config.get().ATTESTABLE_STORAGE_DIR)
+
+
+def get_downloads_dir() -> pathlib.Path:
+ return pathlib.Path(config.get().DOWNLOADS_STORAGE_DIR)
+
+
+def get_finished_dir() -> pathlib.Path:
+ return pathlib.Path(config.get().FINISHED_STORAGE_DIR)
+
+
+def get_quarantined_dir() -> pathlib.Path:
+ return pathlib.Path(config.get().STATE_DIR) / "quarantined"
+
+
+def get_tmp_dir() -> pathlib.Path:
+ # This must be on the same filesystem as the other state subdirectories
+ return pathlib.Path(config.get().STATE_DIR) / "temporary"
+
+
+def get_unfinished_dir() -> pathlib.Path:
+ return pathlib.Path(config.get().UNFINISHED_STORAGE_DIR)
+
+
+def get_upload_staging_dir(session_token: str) -> pathlib.Path:
+ if not session_token.isalnum():
+ raise ValueError("Invalid session token")
+ return get_tmp_dir() / "upload-staging" / session_token
+
+
+def quarantine_directory(quarantined: sql.Quarantined) -> pathlib.Path:
+ if not quarantined.token.isalnum():
+ raise ValueError("Invalid quarantine token")
+ release = quarantined.release
+ return get_quarantined_dir() / release.project_name / release.version /
quarantined.token
+
+
+def release_directory(release: sql.Release) -> pathlib.Path:
+ """Return the absolute path to the directory containing the active files
for a given release phase."""
+ latest_revision_number = release.latest_revision_number
+ if (release.phase == sql.ReleasePhase.RELEASE) or (latest_revision_number
is None):
+ return release_directory_base(release)
+ return release_directory_base(release) / latest_revision_number
+
+
+def release_directory_base(release: sql.Release) -> pathlib.Path:
+ """Determine the filesystem directory for a given release based on its
phase."""
+ phase = release.phase
+ project_name = release.project.name
+ version_name = release.version
+
+ base_dir: pathlib.Path | None = None
+ match phase:
+ case sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT:
+ base_dir = get_unfinished_dir()
+ case sql.ReleasePhase.RELEASE_CANDIDATE:
+ base_dir = get_unfinished_dir()
+ case sql.ReleasePhase.RELEASE_PREVIEW:
+ base_dir = get_unfinished_dir()
+ case sql.ReleasePhase.RELEASE:
+ base_dir = get_finished_dir()
+ # Do not add "case _" here
+ return base_dir / project_name / version_name
+
+
+def release_directory_revision(release: sql.Release) -> pathlib.Path | None:
+ """Return the path to the directory containing the active files for a
given release phase."""
+ path_project = release.project.name
+ path_version = release.version
+ match release.phase:
+ case (
+ sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
+ | sql.ReleasePhase.RELEASE_CANDIDATE
+ | sql.ReleasePhase.RELEASE_PREVIEW
+ ):
+ if (path_revision := release.latest_revision_number) is None:
+ return None
+ path = get_unfinished_dir() / path_project / path_version /
path_revision
+ case sql.ReleasePhase.RELEASE:
+ path = get_finished_dir() / path_project / path_version
+ # Do not add "case _" here
+ return path
+
+
+def release_directory_version(release: sql.Release) -> pathlib.Path:
+ """Return the path to the directory containing the active files for a
given release phase."""
+ path_project = release.project.name
+ path_version = release.version
+ match release.phase:
+ case (
+ sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
+ | sql.ReleasePhase.RELEASE_CANDIDATE
+ | sql.ReleasePhase.RELEASE_PREVIEW
+ ):
+ path = get_unfinished_dir() / path_project / path_version
+ case sql.ReleasePhase.RELEASE:
+ path = get_finished_dir() / path_project / path_version
+ # Do not add "case _" here
+ return path
diff --git a/atr/post/upload.py b/atr/post/upload.py
index bee8a30e..7b4f9f50 100644
--- a/atr/post/upload.py
+++ b/atr/post/upload.py
@@ -32,6 +32,7 @@ import atr.form as form
import atr.get as get
import atr.log as log
import atr.models.sql as sql
+import atr.paths as paths
import atr.shared as shared
import atr.storage as storage
import atr.storage.types as types
@@ -48,7 +49,7 @@ async def finalise(
await session.check_access(project_name)
try:
- staging_dir = util.get_upload_staging_dir(upload_session)
+ staging_dir = paths.get_upload_staging_dir(upload_session)
except ValueError:
return _json_error("Invalid session token", 400)
@@ -123,7 +124,7 @@ async def stage(
await session.check_access(project_name)
try:
- staging_dir = util.get_upload_staging_dir(upload_session)
+ staging_dir = paths.get_upload_staging_dir(upload_session)
except ValueError:
return _json_error("Invalid session token", 400)
diff --git a/atr/server.py b/atr/server.py
index 3119098f..6dcee179 100644
--- a/atr/server.py
+++ b/atr/server.py
@@ -56,6 +56,7 @@ import atr.filters as filters
import atr.log as log
import atr.manager as manager
import atr.models.sql as sql
+import atr.paths as paths
import atr.preload as preload
import atr.ssh as ssh
import atr.svn.pubsub as pubsub
@@ -172,13 +173,13 @@ def _app_dirs_setup(state_dir_str: str, hot_reload: bool)
-> None:
pathlib.Path(state_dir_str) / "runtime",
pathlib.Path(state_dir_str) / "secrets" / "curated",
pathlib.Path(state_dir_str) / "secrets" / "generated",
- util.get_downloads_dir(),
- util.get_finished_dir(),
- util.get_quarantined_dir(),
- util.get_tmp_dir(),
- util.get_unfinished_dir(),
+ paths.get_downloads_dir(),
+ paths.get_finished_dir(),
+ paths.get_quarantined_dir(),
+ paths.get_tmp_dir(),
+ paths.get_unfinished_dir(),
]
- unfinished_dir = util.get_unfinished_dir()
+ unfinished_dir = paths.get_unfinished_dir()
for directory in directories_to_ensure:
directory.mkdir(parents=True, exist_ok=True)
if directory != unfinished_dir:
@@ -927,7 +928,7 @@ async def _reset_request_log_context():
def _set_file_permissions_to_read_only() -> None:
"""Set permissions of all files in the unfinished and finished directories
to read only."""
# TODO: After a migration period, incorrect permissions should be an error
- directories = [util.get_unfinished_dir(), util.get_finished_dir()]
+ directories = [paths.get_unfinished_dir(), paths.get_finished_dir()]
fixed_count = 0
for directory in directories:
if not directory.exists():
diff --git a/atr/shared/web.py b/atr/shared/web.py
index 03ab044f..ee71ac40 100644
--- a/atr/shared/web.py
+++ b/atr/shared/web.py
@@ -25,6 +25,7 @@ import atr.get as get
import atr.htm as htm
import atr.models.results as results
import atr.models.sql as sql
+import atr.paths as paths
import atr.post as post
import atr.shared.draft as draft
import atr.storage as storage
@@ -48,16 +49,16 @@ async def check(
can_vote: bool = False,
can_resolve: bool = False,
) -> web.WerkzeugResponse | str:
- base_path = util.release_directory(release)
+ base_path = paths.release_directory(release)
# TODO: This takes 180ms for providers
# We could cache it
- paths = [path async for path in util.paths_recursive(base_path)]
- paths.sort()
+ all_paths = [path async for path in util.paths_recursive(base_path)]
+ all_paths.sort()
async with storage.read(session) as read:
ragp = read.as_general_public()
- info = await ragp.releases.path_info(release, paths)
+ info = await ragp.releases.path_info(release, all_paths)
user_ssh_keys: Sequence[sql.SSHKey] = []
asf_id: str | None = None
@@ -95,7 +96,7 @@ async def check(
)
delete_file_forms: dict[str, htm.Element] = {}
- for path in paths:
+ for path in all_paths:
delete_file_forms[str(path)] = form.render(
model_cls=draft.DeleteFileForm,
action=util.as_url(post.draft.delete_file,
project_name=release.project.name, version_name=release.version),
@@ -130,7 +131,7 @@ async def check(
vote_task_warnings = _warnings_from_vote_result(vote_task)
has_files = await util.has_files(release)
- has_any_errors = any(info.errors.get(path, []) for path in paths) if info
else False
+ has_any_errors = any(info.errors.get(path, []) for path in all_paths) if
info else False
strict_checking = release.project.policy_strict_checking
strict_checking_errors = strict_checking and has_any_errors
blocker_errors = False
diff --git a/atr/ssh.py b/atr/ssh.py
index 6f9d1433..567a4085 100644
--- a/atr/ssh.py
+++ b/atr/ssh.py
@@ -41,6 +41,7 @@ import atr.config as config
import atr.db as db
import atr.log as log
import atr.models.sql as sql
+import atr.paths as paths
import atr.storage as storage
import atr.storage.types as types
import atr.user as user
@@ -521,7 +522,7 @@ async def _step_07a_process_validated_rsync_read(
exit_status = 1
try:
# Determine the source directory based on the release phase and
revision
- source_dir = util.release_directory(release)
+ source_dir = paths.release_directory(release)
log.info(
f"Identified source directory for read: {source_dir} for release "
f"{release.name} (phase {release.phase.value})"
diff --git a/atr/storage/readers/releases.py b/atr/storage/readers/releases.py
index 18a48a56..77d330fb 100644
--- a/atr/storage/readers/releases.py
+++ b/atr/storage/readers/releases.py
@@ -25,6 +25,7 @@ import atr.classify as classify
import atr.db as db
import atr.db.interaction as interaction
import atr.models.sql as sql
+import atr.paths as paths
import atr.storage as storage
import atr.storage.types as types
import atr.util as util
@@ -54,20 +55,20 @@ class GeneralPublic:
self.__data = data
self.__asf_uid = read.authorisation.asf_uid
- async def path_info(self, release: sql.Release, paths: list[pathlib.Path])
-> types.PathInfo | None:
+ async def path_info(self, release: sql.Release, all_paths:
list[pathlib.Path]) -> types.PathInfo | None:
info = types.PathInfo()
latest_revision_number = release.latest_revision_number
if latest_revision_number is None:
return None
await self.__successes_errors_warnings(release,
latest_revision_number, info)
- base_path = util.release_directory(release)
+ base_path = paths.release_directory(release)
source_matcher = None
source_artifact_paths = release.project.policy_source_artifact_paths
if source_artifact_paths:
source_matcher = util.create_path_matcher(source_artifact_paths,
None, base_path)
- for path in paths:
+ for path in all_paths:
info.file_types[path] = classify.classify(path,
base_path=base_path, source_matcher=source_matcher)
- self.__compute_checker_stats(info, paths)
+ self.__compute_checker_stats(info, all_paths)
return info
def __accumulate_results(
diff --git a/atr/storage/writers/announce.py b/atr/storage/writers/announce.py
index 5db5a945..aa9b0a88 100644
--- a/atr/storage/writers/announce.py
+++ b/atr/storage/writers/announce.py
@@ -30,6 +30,7 @@ import sqlmodel
import atr.construct as construct
import atr.db as db
import atr.models.sql as sql
+import atr.paths as paths
import atr.storage as storage
import atr.tasks.message as message
import atr.util as util
@@ -170,12 +171,12 @@ class CommitteeMember(CommitteeParticipant):
subject, _ = await
construct.announce_release_subject_and_body(subject_template, "", options)
# Prepare paths for file operations
- unfinished_revisions_path = util.release_directory_base(release)
+ unfinished_revisions_path = paths.release_directory_base(release)
unfinished_path = unfinished_revisions_path /
release.unwrap_revision_number
unfinished_dir = str(unfinished_path)
release_date = datetime.datetime.now(datetime.UTC)
predicted_finished_release =
self.__predicted_finished_release(release, release_date)
- finished_path = util.release_directory(predicted_finished_release)
+ finished_path = paths.release_directory(predicted_finished_release)
finished_dir = str(finished_path)
if await aiofiles.os.path.exists(finished_dir):
raise storage.AccessError("Release already exists")
@@ -258,7 +259,7 @@ class CommitteeMember(CommitteeParticipant):
) -> None:
"""Hard link the release files to the downloads directory."""
# TODO: Rename *_dir functions to _path functions
- downloads_base_path = util.get_downloads_dir()
+ downloads_base_path = paths.get_downloads_dir()
downloads_path = downloads_base_path / committee.name /
download_path_suffix.removeprefix("/")
# The "exist_ok" parameter means to overwrite files if True
# We only overwrite if we're not preserving, so we supply "not
preserve"
diff --git a/atr/storage/writers/keys.py b/atr/storage/writers/keys.py
index f2cbe783..4b3cff77 100644
--- a/atr/storage/writers/keys.py
+++ b/atr/storage/writers/keys.py
@@ -41,6 +41,7 @@ import atr.config as config
import atr.db as db
import atr.log as log
import atr.models.sql as sql
+import atr.paths as paths
import atr.storage as storage
import atr.storage.outcome as outcome
import atr.storage.types as types
@@ -416,7 +417,7 @@ class CommitteeParticipant(FoundationCommitter):
self,
) -> outcome.Outcome[str]:
try:
- base_downloads_dir = util.get_downloads_dir()
+ base_downloads_dir = paths.get_downloads_dir()
committee = await self.committee()
is_podling = committee.is_podling
@@ -470,7 +471,7 @@ class CommitteeParticipant(FoundationCommitter):
version=version_name,
_committee=True,
).demand(storage.AccessError(f"Release not found: {project_name}
{version_name}"))
- keys_path = util.release_directory(release) / "KEYS"
+ keys_path = paths.release_directory(release) / "KEYS"
async with aiofiles.open(keys_path, encoding="utf-8") as f:
keys_file_text = await f.read()
if release.committee is None:
diff --git a/atr/storage/writers/policy.py b/atr/storage/writers/policy.py
index 8c376388..a6ff2760 100644
--- a/atr/storage/writers/policy.py
+++ b/atr/storage/writers/policy.py
@@ -24,9 +24,9 @@ import strictyaml
import strictyaml.ruamel.error as error
import atr.db as db
+import atr.hashes as hashes
import atr.models as models
import atr.storage as storage
-import atr.util as util
if TYPE_CHECKING:
import atr.shared as shared
@@ -190,8 +190,8 @@ class CommitteeMember(CommitteeParticipant):
) -> None:
submitted_subject = submitted_subject.strip()
current_default_text = project.policy_announce_release_subject_default
- current_default_hash =
util.compute_sha3_256(current_default_text.encode())
- submitted_hash = util.compute_sha3_256(submitted_subject.encode())
+ current_default_hash =
hashes.compute_sha3_256(current_default_text.encode())
+ submitted_hash = hashes.compute_sha3_256(submitted_subject.encode())
if submitted_hash == current_default_hash:
release_policy.announce_release_subject = ""
@@ -206,8 +206,8 @@ class CommitteeMember(CommitteeParticipant):
) -> None:
submitted_template = submitted_template.replace("\r\n", "\n")
current_default_text = project.policy_announce_release_default
- current_default_hash =
util.compute_sha3_256(current_default_text.encode())
- submitted_hash = util.compute_sha3_256(submitted_template.encode())
+ current_default_hash =
hashes.compute_sha3_256(current_default_text.encode())
+ submitted_hash = hashes.compute_sha3_256(submitted_template.encode())
if submitted_hash == current_default_hash:
release_policy.announce_release_template = ""
@@ -235,8 +235,8 @@ class CommitteeMember(CommitteeParticipant):
) -> None:
submitted_subject = submitted_subject.strip()
current_default_text = project.policy_start_vote_subject_default
- current_default_hash =
util.compute_sha3_256(current_default_text.encode())
- submitted_hash = util.compute_sha3_256(submitted_subject.encode())
+ current_default_hash =
hashes.compute_sha3_256(current_default_text.encode())
+ submitted_hash = hashes.compute_sha3_256(submitted_subject.encode())
if submitted_hash == current_default_hash:
release_policy.start_vote_subject = ""
@@ -251,8 +251,8 @@ class CommitteeMember(CommitteeParticipant):
) -> None:
submitted_template = submitted_template.replace("\r\n", "\n")
current_default_text = project.policy_start_vote_default
- current_default_hash =
util.compute_sha3_256(current_default_text.encode())
- submitted_hash = util.compute_sha3_256(submitted_template.encode())
+ current_default_hash =
hashes.compute_sha3_256(current_default_text.encode())
+ submitted_hash = hashes.compute_sha3_256(submitted_template.encode())
if submitted_hash == current_default_hash:
release_policy.start_vote_template = ""
diff --git a/atr/storage/writers/release.py b/atr/storage/writers/release.py
index d05b80d4..411bd642 100644
--- a/atr/storage/writers/release.py
+++ b/atr/storage/writers/release.py
@@ -36,6 +36,7 @@ import atr.form as form
import atr.log as log
import atr.models.api as api
import atr.models.sql as sql
+import atr.paths as paths
import atr.storage as storage
import atr.storage.types as types
import atr.util as util
@@ -104,8 +105,8 @@ class CommitteeParticipant(FoundationCommitter):
project_name=project_name, version=version, phase=phase,
_committee=True
).demand(storage.AccessError(f"Release '{project_name} {version}' not
found."))
release_dirs = [
- util.release_directory_base(release),
- util.get_attestable_dir() / project_name / version,
+ paths.release_directory_base(release),
+ paths.get_attestable_dir() / project_name / version,
]
# Delete from the database using bulk SQL DELETE for efficiency
@@ -527,7 +528,7 @@ class CommitteeParticipant(FoundationCommitter):
async def __delete_release_data_downloads(self, release: sql.Release) ->
None:
# Delete hard links from the downloads directory
- finished_dir = util.release_directory(release)
+ finished_dir = paths.release_directory(release)
if await aiofiles.os.path.isdir(finished_dir):
release_inodes = set()
async for file_path in util.paths_recursive(finished_dir):
@@ -538,7 +539,7 @@ class CommitteeParticipant(FoundationCommitter):
continue
if release_inodes:
- downloads_dir = util.get_downloads_dir()
+ downloads_dir = paths.get_downloads_dir()
async for link_path in util.paths_recursive(downloads_dir):
full_link_path = downloads_dir / link_path
try:
diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py
index 16d1f00b..9e7bbbd0 100644
--- a/atr/storage/writers/revision.py
+++ b/atr/storage/writers/revision.py
@@ -36,6 +36,7 @@ import atr.db.interaction as interaction
import atr.detection as detection
import atr.merge as merge
import atr.models.sql as sql
+import atr.paths as paths
import atr.storage as storage
import atr.storage.types as types
import atr.tasks as tasks
@@ -136,16 +137,16 @@ class CommitteeParticipant(FoundationCommitter):
release.check_cache_key = None
if clone_from is not None:
- old_release_dir = util.release_directory_base(release) / clone_from
+ old_release_dir = paths.release_directory_base(release) /
clone_from
else:
- old_release_dir = util.release_directory(release)
+ old_release_dir = paths.release_directory(release)
merge_enabled = clone_from is None
# Create a temporary directory
# We ensure, below, that it's removed on any exception
# Use the tmp subdirectory of state, to ensure that it is on the same
filesystem
prefix_token = secrets.token_hex(16)
- temp_dir: str = await asyncio.to_thread(tempfile.mkdtemp,
prefix=prefix_token + "-", dir=util.get_tmp_dir())
+ temp_dir: str = await asyncio.to_thread(tempfile.mkdtemp,
prefix=prefix_token + "-", dir=paths.get_tmp_dir())
temp_dir_path = pathlib.Path(temp_dir)
try:
@@ -233,7 +234,7 @@ class CommitteeParticipant(FoundationCommitter):
and (prior_name != old_revision.name)
):
prior_number = prior_name.split()[-1]
- prior_dir = util.release_directory_base(release) /
prior_number
+ prior_dir = paths.release_directory_base(release) /
prior_number
await merge.merge(
base_inodes,
base_hashes,
@@ -250,7 +251,7 @@ class CommitteeParticipant(FoundationCommitter):
# Rename the directory to the new revision number
await data.refresh(release)
- new_revision_dir = util.release_directory(release)
+ new_revision_dir = paths.release_directory(release)
# Ensure that the parent directory exists
await aiofiles.os.makedirs(new_revision_dir.parent,
exist_ok=True)
diff --git a/atr/tasks/__init__.py b/atr/tasks/__init__.py
index 282db9cf..6c4a47f0 100644
--- a/atr/tasks/__init__.py
+++ b/atr/tasks/__init__.py
@@ -30,6 +30,7 @@ import atr.db as db
import atr.hashes as hashes
import atr.models.results as results
import atr.models.sql as sql
+import atr.paths as file_paths
import atr.tasks.checks as checks
import atr.tasks.checks.compare as compare
import atr.tasks.checks.hashing as hashing
@@ -134,7 +135,7 @@ async def draft_checks(
"""Core logic to analyse a draft revision and queue checks."""
# Construct path to the specific revision
# We don't have the release object here, so we can't use
util.release_directory
- revision_path = util.get_unfinished_dir() / project_name / release_version
/ revision_number
+ revision_path = file_paths.get_unfinished_dir() / project_name /
release_version / revision_number
relative_paths = [path async for path in
util.paths_recursive(revision_path)]
async with db.ensure_session(caller_data) as data:
diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py
index 4bdcf40b..e5c0e25f 100644
--- a/atr/tasks/checks/__init__.py
+++ b/atr/tasks/checks/__init__.py
@@ -36,11 +36,11 @@ if TYPE_CHECKING:
import atr.attestable as attestable
import atr.db as db
-import atr.file_paths as file_paths
import atr.hashes as hashes
import atr.log as log
import atr.models.github as github_models
import atr.models.sql as sql
+import atr.paths as paths
import atr.util as util
@@ -179,7 +179,7 @@ class Recorder:
return self.abs_path_base() / rel_path_part
def abs_path_base(self) -> pathlib.Path:
- return file_paths.base_path_for_revision(self.project_name,
self.version_name, self.revision_number)
+ return paths.base_path_for_revision(self.project_name,
self.version_name, self.revision_number)
async def project(self) -> sql.Project:
# TODO: Cache project
@@ -335,7 +335,7 @@ async def resolve_cache_key(
policy = release.release_policy or release.project.release_policy
if not ignore_path:
if path is None:
- path = file_paths.revision_path_for_file(release.project_name,
release.version, revision, file or "")
+ path = paths.revision_path_for_file(release.project_name,
release.version, revision, file or "")
file_hash = await hashes.compute_file_hash(path)
if file_hash:
cache_key["file_hash"] = file_hash
@@ -379,9 +379,7 @@ async def _resolve_all_files(release: sql.Release,
rel_path: str | None = None)
if not release.latest_revision_number:
return []
if not (
- base_path := file_paths.base_path_for_revision(
- release.project_name, release.version,
release.latest_revision_number
- )
+ base_path := paths.base_path_for_revision(release.project_name,
release.version, release.latest_revision_number)
):
return []
@@ -430,7 +428,7 @@ async def _resolve_is_podling(release: sql.Release,
rel_path: str | None = None)
async def _resolve_unsuffixed_file_hash(release: sql.Release, rel_path: str |
None = None) -> str:
if (not rel_path) or (not release.latest_revision_number):
return ""
- abs_path = file_paths.revision_path_for_file(
+ abs_path = paths.revision_path_for_file(
release.project_name, release.version, release.latest_revision_number,
rel_path
)
plain_path = abs_path.with_suffix("")
diff --git a/atr/tasks/checks/compare.py b/atr/tasks/checks/compare.py
index 72905a2d..013f7839 100644
--- a/atr/tasks/checks/compare.py
+++ b/atr/tasks/checks/compare.py
@@ -42,6 +42,7 @@ import atr.config as config
import atr.log as log
import atr.models.github as github_models
import atr.models.results as results
+import atr.paths as paths
import atr.tasks.checks as checks
import atr.util as util
@@ -102,7 +103,7 @@ async def source_trees(args: checks.FunctionArguments) ->
results.Results | None
return None
max_extract_size = args.extra_args.get("max_extract_size",
_CONFIG.MAX_EXTRACT_SIZE)
chunk_size = args.extra_args.get("chunk_size",
_CONFIG.EXTRACT_CHUNK_SIZE)
- tmp_dir = util.get_tmp_dir()
+ tmp_dir = paths.get_tmp_dir()
await aiofiles.os.makedirs(tmp_dir, exist_ok=True)
async with util.async_temporary_directory(prefix="trees-",
dir=tmp_dir) as temp_dir:
github_dir = temp_dir / "github"
diff --git a/atr/tasks/sbom.py b/atr/tasks/sbom.py
index 7c60d88d..5a6539f0 100644
--- a/atr/tasks/sbom.py
+++ b/atr/tasks/sbom.py
@@ -30,6 +30,7 @@ import atr.log as log
import atr.models.results as results
import atr.models.schema as schema
import atr.models.sql as sql
+import atr.paths as paths
import atr.sbom as sbom
import atr.storage as storage
import atr.tasks.checks as checks
@@ -81,7 +82,7 @@ class ScoreArgs(FileArgs):
@checks.with_model(FileArgs)
async def augment(args: FileArgs) -> results.Results | None:
- base_dir = util.get_unfinished_dir() / args.project_name /
args.version_name / args.revision_number
+ base_dir = paths.get_unfinished_dir() / args.project_name /
args.version_name / args.revision_number
if not await aiofiles.os.path.isdir(base_dir):
raise SBOMScoringError("Revision directory does not exist",
{"base_dir": str(base_dir)})
full_path = base_dir / args.file_path
@@ -141,7 +142,7 @@ async def generate_cyclonedx(args: GenerateCycloneDX) ->
results.Results | None:
@checks.with_model(FileArgs)
async def osv_scan(args: FileArgs) -> results.Results | None:
- base_dir = util.get_unfinished_dir() / args.project_name /
args.version_name / args.revision_number
+ base_dir = paths.get_unfinished_dir() / args.project_name /
args.version_name / args.revision_number
if not await aiofiles.os.path.isdir(base_dir):
raise SBOMScanningError("Revision directory does not exist",
{"base_dir": str(base_dir)})
full_path = base_dir / args.file_path
@@ -198,7 +199,7 @@ async def osv_scan(args: FileArgs) -> results.Results |
None:
@checks.with_model(FileArgs)
async def score_qs(args: FileArgs) -> results.Results | None:
- base_dir = util.get_unfinished_dir() / args.project_name /
args.version_name / args.revision_number
+ base_dir = paths.get_unfinished_dir() / args.project_name /
args.version_name / args.revision_number
if not await aiofiles.os.path.isdir(base_dir):
raise SBOMScoringError("Revision directory does not exist",
{"base_dir": str(base_dir)})
full_path = base_dir / args.file_path
@@ -234,10 +235,10 @@ async def score_qs(args: FileArgs) -> results.Results |
None:
@checks.with_model(ScoreArgs)
async def score_tool(args: ScoreArgs) -> results.Results | None:
- base_dir = util.get_unfinished_dir() / args.project_name /
args.version_name / args.revision_number
+ base_dir = paths.get_unfinished_dir() / args.project_name /
args.version_name / args.revision_number
previous_base_dir = None
if args.previous_release_version is not None:
- previous_base_dir = util.get_finished_dir() / args.project_name /
args.previous_release_version
+ previous_base_dir = paths.get_finished_dir() / args.project_name /
args.previous_release_version
if not await aiofiles.os.path.isdir(base_dir):
raise SBOMScoringError("Revision directory does not exist",
{"base_dir": str(base_dir)})
full_path = base_dir / args.file_path
diff --git a/atr/util.py b/atr/util.py
index e412f74c..6e55f577 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -58,6 +58,7 @@ import atr.models.validation as validation
import atr.registry as registry
import atr.tarzip as tarzip
import atr.user as user
+from atr.paths import release_directory, release_directory_revision
ARCHIVE_ROOT_SUFFIXES: Final[tuple[str, ...]] = ("-source", "-src")
DIRECTORY_PERMISSIONS: Final[int] = 0o755
@@ -246,20 +247,6 @@ def committee_is_standing(committee_name: str) -> bool:
return committee_name in registry.STANDING_COMMITTEES
-def compute_sha3_256(file_data: bytes) -> str:
- """Compute SHA3-256 hash of file data."""
- return hashlib.sha3_256(file_data).hexdigest()
-
-
-async def compute_sha512(file_path: pathlib.Path) -> str:
- """Compute SHA-512 hash of a file."""
- sha512 = hashlib.sha512()
- async with aiofiles.open(file_path, "rb") as f:
- while chunk := await f.read(4096):
- sha512.update(chunk)
- return sha512.hexdigest()
-
-
async def content_list(
phase_subdir: pathlib.Path, project_name: str, version_name: str,
revision_name: str | None = None
) -> AsyncGenerator[FileStat]:
@@ -449,15 +436,6 @@ async def email_to_uid_map() -> dict[str, str]:
return email_to_uid
-async def file_sha3(path: str) -> str:
- """Compute SHA3-256 hash of a file."""
- sha3 = hashlib.sha3_256()
- async with aiofiles.open(path, "rb") as f:
- while chunk := await f.read(4096):
- sha3.update(chunk)
- return sha3.hexdigest()
-
-
def format_datetime(dt_obj: datetime.datetime | int) -> str:
"""Format a datetime object or Unix timestamp into a human readable
datetime string."""
# Integers are unix timestamps
@@ -542,22 +520,6 @@ async def get_asf_id_or_die() -> str:
return web_session.uid
-def get_attestable_dir() -> pathlib.Path:
- return pathlib.Path(config.get().ATTESTABLE_STORAGE_DIR)
-
-
-def get_downloads_dir() -> pathlib.Path:
- return pathlib.Path(config.get().DOWNLOADS_STORAGE_DIR)
-
-
-def get_finished_dir() -> pathlib.Path:
- return pathlib.Path(config.get().FINISHED_STORAGE_DIR)
-
-
-def get_quarantined_dir() -> pathlib.Path:
- return pathlib.Path(config.get().STATE_DIR) / "quarantined"
-
-
async def get_release_stats(release: sql.Release) -> tuple[int, int, str]:
"""Calculate file count, total byte size, and formatted size for a
release."""
base_dir = release_directory(release)
@@ -580,21 +542,6 @@ async def get_release_stats(release: sql.Release) ->
tuple[int, int, str]:
return count, total_bytes, formatted_size
-def get_tmp_dir() -> pathlib.Path:
- # This must be on the same filesystem as the other state subdirectories
- return pathlib.Path(config.get().STATE_DIR) / "temporary"
-
-
-def get_unfinished_dir() -> pathlib.Path:
- return pathlib.Path(config.get().UNFINISHED_STORAGE_DIR)
-
-
-def get_upload_staging_dir(session_token: str) -> pathlib.Path:
- if not session_token.isalnum():
- raise ValueError("Invalid session token")
- return get_tmp_dir() / "upload-staging" / session_token
-
-
async def get_urls_as_completed(urls: Sequence[str]) ->
AsyncGenerator[tuple[str, int | str | None, bytes]]:
"""GET a list of URLs in parallel and yield (url, status, content_bytes)
as they become available."""
async with create_secure_session() as session:
@@ -893,13 +840,6 @@ def plural(count: int, singular: str, plural_form: str |
None = None, *, include
return word
-def quarantine_directory(quarantined: sql.Quarantined) -> pathlib.Path:
- if not quarantined.token.isalnum():
- raise ValueError("Invalid quarantine token")
- release = quarantined.release
- return get_quarantined_dir() / release.project_name / release.version /
quarantined.token
-
-
async def read_file_for_viewer(full_path: pathlib.Path, max_size: int) ->
tuple[str | None, bool, bool, str | None]:
"""Read file content for viewer."""
content: str | None = None
@@ -943,70 +883,6 @@ async def read_file_for_viewer(full_path: pathlib.Path,
max_size: int) -> tuple[
return content, is_text, is_truncated, error_message
-def release_directory(release: sql.Release) -> pathlib.Path:
- """Return the absolute path to the directory containing the active files
for a given release phase."""
- latest_revision_number = release.latest_revision_number
- if (release.phase == sql.ReleasePhase.RELEASE) or (latest_revision_number
is None):
- return release_directory_base(release)
- return release_directory_base(release) / latest_revision_number
-
-
-def release_directory_base(release: sql.Release) -> pathlib.Path:
- """Determine the filesystem directory for a given release based on its
phase."""
- phase = release.phase
- project_name = release.project.name
- version_name = release.version
-
- base_dir: pathlib.Path | None = None
- match phase:
- case sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT:
- base_dir = get_unfinished_dir()
- case sql.ReleasePhase.RELEASE_CANDIDATE:
- base_dir = get_unfinished_dir()
- case sql.ReleasePhase.RELEASE_PREVIEW:
- base_dir = get_unfinished_dir()
- case sql.ReleasePhase.RELEASE:
- base_dir = get_finished_dir()
- # Do not add "case _" here
- return base_dir / project_name / version_name
-
-
-def release_directory_revision(release: sql.Release) -> pathlib.Path | None:
- """Return the path to the directory containing the active files for a
given release phase."""
- path_project = release.project.name
- path_version = release.version
- match release.phase:
- case (
- sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
- | sql.ReleasePhase.RELEASE_CANDIDATE
- | sql.ReleasePhase.RELEASE_PREVIEW
- ):
- if (path_revision := release.latest_revision_number) is None:
- return None
- path = get_unfinished_dir() / path_project / path_version /
path_revision
- case sql.ReleasePhase.RELEASE:
- path = get_finished_dir() / path_project / path_version
- # Do not add "case _" here
- return path
-
-
-def release_directory_version(release: sql.Release) -> pathlib.Path:
- """Return the path to the directory containing the active files for a
given release phase."""
- path_project = release.project.name
- path_version = release.version
- match release.phase:
- case (
- sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
- | sql.ReleasePhase.RELEASE_CANDIDATE
- | sql.ReleasePhase.RELEASE_PREVIEW
- ):
- path = get_unfinished_dir() / path_project / path_version
- case sql.ReleasePhase.RELEASE:
- path = get_finished_dir() / path_project / path_version
- # Do not add "case _" here
- return path
-
-
async def session_cache_read() -> dict[str, dict]:
cache_path = pathlib.Path(config.get().STATE_DIR) / "cache" /
"user_session_cache.json"
try:
diff --git a/atr/validate.py b/atr/validate.py
index 760dfc56..7688f823 100644
--- a/atr/validate.py
+++ b/atr/validate.py
@@ -23,7 +23,7 @@ from typing import NamedTuple, TypeVar
import atr.db as db
import atr.models.sql as sql
-import atr.util as util
+import atr.paths as paths
class Divergence(NamedTuple):
@@ -330,7 +330,7 @@ def release_name(r: sql.Release) -> Divergences:
@release_components("Release")
def release_on_disk(r: sql.Release) -> Divergences:
"""Check that the release is on disk."""
- path = util.release_directory(r)
+ path = paths.release_directory(r)
def okay(p: pathlib.Path) -> bool:
# The release directory must exist and contain at least one entry
diff --git a/tests/unit/test_paths.py b/tests/unit/test_paths.py
new file mode 100644
index 00000000..0c36c46b
--- /dev/null
+++ b/tests/unit/test_paths.py
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pathlib
+import types
+
+import pytest
+
+import atr.paths as paths
+
+
+def test_get_quarantined_dir_uses_state_dir(monkeypatch, tmp_path:
pathlib.Path):
+ mock_config = types.SimpleNamespace(STATE_DIR=str(tmp_path))
+ monkeypatch.setattr("atr.config.get", lambda: mock_config)
+ assert paths.get_quarantined_dir() == tmp_path / "quarantined"
+
+
+def test_quarantine_directory_builds_deterministic_path(monkeypatch, tmp_path:
pathlib.Path):
+ mock_config = types.SimpleNamespace(STATE_DIR=str(tmp_path))
+ monkeypatch.setattr("atr.config.get", lambda: mock_config)
+ mock_release = types.SimpleNamespace(project_name="example",
version="1.2.3")
+ quarantined = types.SimpleNamespace(release=mock_release,
token="0123456789abcdef")
+ assert (
+ paths.quarantine_directory(quarantined) == tmp_path / "quarantined" /
"example" / "1.2.3" / "0123456789abcdef"
+ )
+
+
+def test_quarantine_directory_rejects_non_alnum_token():
+ quarantined = types.SimpleNamespace(token="../escape")
+ with pytest.raises(ValueError, match="Invalid quarantine token"):
+ paths.quarantine_directory(quarantined)
diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py
index aec4aaac..20ff6505 100644
--- a/tests/unit/test_util.py
+++ b/tests/unit/test_util.py
@@ -19,9 +19,6 @@ import json
import os
import pathlib
import stat
-import types
-
-import pytest
import atr.util as util
@@ -95,12 +92,6 @@ def test_chmod_files_sets_default_permissions(tmp_path:
pathlib.Path):
assert file_mode == 0o444
-def test_get_quarantined_dir_uses_state_dir(monkeypatch, tmp_path:
pathlib.Path):
- mock_config = types.SimpleNamespace(STATE_DIR=str(tmp_path))
- monkeypatch.setattr("atr.config.get", lambda: mock_config)
- assert util.get_quarantined_dir() == tmp_path / "quarantined"
-
-
def test_json_for_script_element_escapes_correctly():
payload = ["example.txt", "</script><script>alert(1)</script>",
"apple&banana"]
@@ -111,17 +102,3 @@ def test_json_for_script_element_escapes_correctly():
assert "apple&banana" not in serialized
assert "apple\\u0026banana" in serialized
assert json.loads(serialized) == payload
-
-
-def test_quarantine_directory_builds_deterministic_path(monkeypatch, tmp_path:
pathlib.Path):
- mock_config = types.SimpleNamespace(STATE_DIR=str(tmp_path))
- monkeypatch.setattr("atr.config.get", lambda: mock_config)
- mock_release = types.SimpleNamespace(project_name="example",
version="1.2.3")
- quarantined = types.SimpleNamespace(release=mock_release,
token="0123456789abcdef")
- assert util.quarantine_directory(quarantined) == tmp_path / "quarantined"
/ "example" / "1.2.3" / "0123456789abcdef"
-
-
-def test_quarantine_directory_rejects_non_alnum_token():
- quarantined = types.SimpleNamespace(token="../escape")
- with pytest.raises(ValueError, match="Invalid quarantine token"):
- util.quarantine_directory(quarantined)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]