This is an automated email from the ASF dual-hosted git repository. arm pushed a commit to branch arm in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
commit 039f5889ae2b32710e83d6c9eb59edb2f5a227db Author: Alastair McFarlane <[email protected]> AuthorDate: Tue Feb 24 11:56:55 2026 +0000 #641 - some initial migrations out of the util module for paths and hash calculation. --- atr/admin/__init__.py | 7 +- atr/api/__init__.py | 16 +++-- atr/attestable.py | 11 ++-- atr/construct.py | 5 +- atr/file_paths.py | 28 -------- atr/get/checks.py | 31 ++++----- atr/get/download.py | 7 +- atr/get/draft.py | 3 +- atr/get/file.py | 7 +- atr/get/finish.py | 3 +- atr/get/published.py | 3 +- atr/get/report.py | 3 +- atr/get/revisions.py | 3 +- atr/get/test.py | 3 +- atr/get/voting.py | 5 +- atr/hashes.py | 25 ++++++- atr/paths.py | 131 +++++++++++++++++++++++++++++++++++++ atr/post/upload.py | 5 +- atr/server.py | 15 +++-- atr/shared/web.py | 15 +++-- atr/ssh.py | 3 +- atr/storage/readers/releases.py | 9 +-- atr/storage/writers/announce.py | 7 +- atr/storage/writers/keys.py | 5 +- atr/storage/writers/policy.py | 18 ++--- atr/storage/writers/release.py | 9 +-- atr/storage/writers/revision.py | 11 ++-- atr/tasks/__init__.py | 3 +- atr/tasks/checks/__init__.py | 2 +- atr/tasks/checks/compare.py | 3 +- atr/tasks/sbom.py | 11 ++-- atr/util.py | 126 +---------------------------------- atr/validate.py | 4 +- tests/unit/test_checks_compare.py | 16 ++--- tests/unit/test_create_revision.py | 8 +-- tests/unit/test_paths.py | 45 +++++++++++++ tests/unit/test_util.py | 23 ------- 37 files changed, 340 insertions(+), 289 deletions(-) diff --git a/atr/admin/__init__.py b/atr/admin/__init__.py index 27dd52d2..0a97d2f8 100644 --- a/atr/admin/__init__.py +++ b/atr/admin/__init__.py @@ -49,6 +49,7 @@ import atr.log as log import atr.mapping as mapping import atr.models.session import atr.models.sql as sql +import atr.paths as paths import atr.principal as principal import atr.storage as storage import atr.storage.outcome as outcome @@ -205,7 +206,7 @@ async def consistency(session: web.Committer) -> web.TextResponse: releases = await data.release().all() database_dirs = [] for release in releases: - path = util.release_directory_version(release) + path = paths.release_directory_version(release) database_dirs.append(str(path)) if len(set(database_dirs)) != len(database_dirs): raise base.ASFQuartException("Duplicate release directories in database", errorcode=500) @@ -1092,7 +1093,7 @@ async def _get_filesystem_dirs() -> list[str]: async def _get_filesystem_dirs_finished(filesystem_dirs: list[str]) -> None: - finished_dir = util.get_finished_dir() + finished_dir = paths.get_finished_dir() finished_dir_contents = await aiofiles.os.listdir(finished_dir) for project_dir in finished_dir_contents: project_dir_path = os.path.join(finished_dir, project_dir) @@ -1105,7 +1106,7 @@ async def _get_filesystem_dirs_finished(filesystem_dirs: list[str]) -> None: async def _get_filesystem_dirs_unfinished(filesystem_dirs: list[str]) -> None: - unfinished_dir = util.get_unfinished_dir() + unfinished_dir = paths.get_unfinished_dir() unfinished_dir_contents = await aiofiles.os.listdir(unfinished_dir) for project_dir in unfinished_dir_contents: project_dir_path = os.path.join(unfinished_dir, project_dir) diff --git a/atr/api/__init__.py b/atr/api/__init__.py index 432af5f8..7e2c6117 100644 --- a/atr/api/__init__.py +++ b/atr/api/__init__.py @@ -33,10 +33,12 @@ import atr.blueprints.api as api import atr.config as config import atr.db as db import atr.db.interaction as interaction +import atr.hashes as hashes import atr.jwtoken as jwtoken import atr.log as log import atr.models as models import atr.models.sql as sql +import atr.paths as paths import atr.principal as principal import atr.storage as storage import atr.storage.outcome as outcome @@ -949,10 +951,10 @@ async def release_paths(project: str, version: str, revision: str | None = None) release_name = sql.release_name(project, version) release = await data.release(name=release_name).demand(exceptions.NotFound()) if revision is None: - dir_path = util.release_directory(release) + dir_path = paths.release_directory(release) else: await data.revision(release_name=release_name, number=revision).demand(exceptions.NotFound()) - dir_path = util.release_directory_version(release) / revision + dir_path = paths.release_directory_version(release) / revision if not (await aiofiles.os.path.isdir(dir_path)): raise exceptions.NotFound("Files not found") files: list[str] = [str(path) for path in [p async for p in util.paths_recursive(dir_path)]] @@ -1086,14 +1088,14 @@ async def signature_provenance(data: models.api.SignatureProvenanceArgs) -> Dict ) ) - downloads_dir = util.get_downloads_dir() - matched_committee_names = await _match_committee_names(key.committees, util.get_finished_dir(), data) + downloads_dir = paths.get_downloads_dir() + matched_committee_names = await _match_committee_names(key.committees, paths.get_finished_dir(), data) for matched_committee_name in matched_committee_names: keys_file_path = downloads_dir / matched_committee_name / "KEYS" async with aiofiles.open(keys_file_path, "rb") as f: keys_file_data = await f.read() - keys_file_sha3_256 = hashlib.sha3_256(keys_file_data).hexdigest() + keys_file_sha3_256 = hashes.compute_sha3_256(keys_file_data) signing_keys.append( models.api.SignatureProvenanceKey( committee=matched_committee_name, @@ -1422,7 +1424,7 @@ async def _match_committee_names( key_committees: list[sql.Committee], finished_dir: pathlib.Path, data: models.api.SignatureProvenanceArgs ) -> set[str]: key_committee_names = set(committee.name for committee in key_committees) - finished_dir = util.get_finished_dir() + finished_dir = paths.get_finished_dir() matched_committee_names = set() # Check for finished files @@ -1446,7 +1448,7 @@ async def _match_committee_names( projects = await db_data.project(committee_name=key_committee_name).all() for project in projects: releases = await db_data.release(project_name=project.name).all() - release_directories.extend(util.release_directory(release) for release in releases) + release_directories.extend(paths.release_directory(release) for release in releases) for release_directory in release_directories: if await _match_unfinished(release_directory, data): matched_committee_names.add(key_committee_name) diff --git a/atr/attestable.py b/atr/attestable.py index 91a65470..eedf927d 100644 --- a/atr/attestable.py +++ b/atr/attestable.py @@ -27,6 +27,7 @@ import pydantic import atr.hashes as hashes import atr.log as log import atr.models.attestable as models +import atr.paths as paths import atr.util as util if TYPE_CHECKING: @@ -34,19 +35,19 @@ if TYPE_CHECKING: def attestable_checks_path(project_name: str, version_name: str, revision_number: str) -> pathlib.Path: - return util.get_attestable_dir() / project_name / version_name / f"{revision_number}.checks.json" + return paths.get_attestable_dir() / project_name / version_name / f"{revision_number}.checks.json" def attestable_path(project_name: str, version_name: str, revision_number: str) -> pathlib.Path: - return util.get_attestable_dir() / project_name / version_name / f"{revision_number}.json" + return paths.get_attestable_dir() / project_name / version_name / f"{revision_number}.json" def attestable_paths_path(project_name: str, version_name: str, revision_number: str) -> pathlib.Path: - return util.get_attestable_dir() / project_name / version_name / f"{revision_number}.paths.json" + return paths.get_attestable_dir() / project_name / version_name / f"{revision_number}.paths.json" def github_tp_payload_path(project_name: str, version_name: str, revision_number: str) -> pathlib.Path: - return util.get_attestable_dir() / project_name / version_name / f"{revision_number}.github-tp.json" + return paths.get_attestable_dir() / project_name / version_name / f"{revision_number}.github-tp.json" async def github_tp_payload_write( @@ -116,7 +117,7 @@ async def load_paths( def migrate_to_paths_files() -> int: - attestable_dir = util.get_attestable_dir() + attestable_dir = paths.get_attestable_dir() if not attestable_dir.is_dir(): return 0 count = 0 diff --git a/atr/construct.py b/atr/construct.py index 55e8a689..f36ba1a6 100644 --- a/atr/construct.py +++ b/atr/construct.py @@ -26,6 +26,7 @@ import quart import atr.config as config import atr.db as db import atr.models.sql as sql +import atr.paths as paths import atr.util as util type Context = Literal["announce", "announce_subject", "checklist", "vote", "vote_subject"] @@ -227,11 +228,11 @@ async def start_vote_subject_and_body(subject: str, body: str, options: StartVot # Therefore there is no route handler, so we have to construct the URL manually keys_file = None if committee.is_podling: - keys_file_path = util.get_downloads_dir() / "incubator" / committee.name / "KEYS" + keys_file_path = paths.get_downloads_dir() / "incubator" / committee.name / "KEYS" if await aiofiles.os.path.isfile(keys_file_path): keys_file = f"https://{host}/downloads/incubator/{committee.name}/KEYS" else: - keys_file_path = util.get_downloads_dir() / committee.name / "KEYS" + keys_file_path = paths.get_downloads_dir() / committee.name / "KEYS" if await aiofiles.os.path.isfile(keys_file_path): keys_file = f"https://{host}/downloads/{committee.name}/KEYS" diff --git a/atr/file_paths.py b/atr/file_paths.py deleted file mode 100644 index d29d6b96..00000000 --- a/atr/file_paths.py +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pathlib - -import atr.util as util - - -def base_path_for_revision(project_name: str, version_name: str, revision: str) -> pathlib.Path: - return pathlib.Path(util.get_unfinished_dir(), project_name, version_name, revision) - - -def revision_path_for_file(project_name: str, version_name: str, revision: str, file_name: str) -> pathlib.Path: - return base_path_for_revision(project_name, version_name, revision) / file_name diff --git a/atr/get/checks.py b/atr/get/checks.py index 36b7b4dc..8faa5ce2 100644 --- a/atr/get/checks.py +++ b/atr/get/checks.py @@ -34,6 +34,7 @@ import atr.get.sbom as sbom import atr.get.vote as vote import atr.htm as htm import atr.models.sql as sql +import atr.paths as paths import atr.post as post import atr.render as render import atr.shared as shared @@ -85,14 +86,14 @@ class FileStats(NamedTuple): async def get_file_totals(release: sql.Release, session: web.Committer | None) -> FileStats: """Get file level check totals after ignores are applied.""" - base_path = util.release_directory(release) - paths = [path async for path in util.paths_recursive(base_path)] + base_path = paths.release_directory(release) + all_paths = [path async for path in util.paths_recursive(base_path)] async with storage.read(session) as read: ragp = read.as_general_public() match_ignore = await ragp.checks.ignores_matcher(release.project_name) - _, totals = await _compute_stats(release, paths, match_ignore) + _, totals = await _compute_stats(release, all_paths, match_ignore) return totals @@ -110,22 +111,22 @@ async def selected(session: web.Committer | None, project_name: str, version_nam if release.committee is None: raise ValueError("Release has no committee") - base_path = util.release_directory(release) - paths = [path async for path in util.paths_recursive(base_path)] - paths.sort() + base_path = paths.release_directory(release) + all_paths = [path async for path in util.paths_recursive(base_path)] + all_paths.sort() async with storage.read(session) as read: ragp = read.as_general_public() match_ignore = await ragp.checks.ignores_matcher(release.project_name) - per_file_stats, totals = await _compute_stats(release, paths, match_ignore) + per_file_stats, totals = await _compute_stats(release, all_paths, match_ignore) page = htm.Block() _render_header(page, release) - _render_summary(page, totals, paths, per_file_stats) - _render_checks_table(page, release, paths, per_file_stats) + _render_summary(page, totals, all_paths, per_file_stats) + _render_checks_table(page, release, all_paths, per_file_stats) _render_ignores_section(page, release) - _render_debug_table(page, paths, per_file_stats) + _render_debug_table(page, all_paths, per_file_stats) return await template.blank( f"File checks for {release.project.short_display_name} {release.version}", @@ -150,13 +151,13 @@ async def selected_revision( _project_release_policy=True, ).demand(base.ASFQuartException("Release does not exist", errorcode=404)) - base_path = util.release_directory(release) - paths = [path async for path in util.paths_recursive(base_path)] - paths.sort() + base_path = paths.release_directory(release) + all_paths = [path async for path in util.paths_recursive(base_path)] + all_paths.sort() async with storage.read(session) as read: ragp = read.as_general_public() - info = await ragp.releases.path_info(release, paths) + info = await ragp.releases.path_info(release, all_paths) ongoing_count = await interaction.tasks_ongoing(project_name, version_name, revision_number) @@ -165,7 +166,7 @@ async def selected_revision( delete_file_forms: dict[str, str] = {} if release.phase == sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT: - for path in paths: + for path in all_paths: delete_file_forms[str(path)] = str( form.render( model_cls=draft.DeleteFileForm, diff --git a/atr/get/download.py b/atr/get/download.py index 7a25aea9..bb8d5145 100644 --- a/atr/get/download.py +++ b/atr/get/download.py @@ -31,6 +31,7 @@ import atr.form as form import atr.htm as htm import atr.mapping as mapping import atr.models.sql as sql +import atr.paths as paths import atr.template as template import atr.util as util import atr.web as web @@ -116,7 +117,7 @@ async def zip_selected(session: web.Committer, project_name: str, version_name: except Exception as e: return web.TextResponse(f"Server error: {e}", status=500) - base_dir = util.release_directory(release) + base_dir = paths.release_directory(release) files_to_zip = [] try: async for rel_path in util.paths_recursive(base_dir): @@ -155,7 +156,7 @@ async def _download_or_list(project_name: str, version_name: str, file_path: str release = await data.release(project_name=project_name, version=version_name).demand( base.ASFQuartException("Release does not exist", errorcode=404) ) - full_path = util.release_directory(release) / validated_path + full_path = paths.release_directory(release) / validated_path if await aiofiles.os.path.isdir(full_path): return await _list(validated_path, full_path, project_name, version_name, str(validated_path)) @@ -175,7 +176,7 @@ async def _download_or_list(project_name: str, version_name: str, file_path: str async def _generate_file_url_list(release: sql.Release) -> str: - base_dir = util.release_directory(release) + base_dir = paths.release_directory(release) urls = [] async for rel_path in util.paths_recursive(base_dir): full_item_path = base_dir / rel_path diff --git a/atr/get/draft.py b/atr/get/draft.py index 763584a5..16a091ac 100644 --- a/atr/get/draft.py +++ b/atr/get/draft.py @@ -24,6 +24,7 @@ import asfquart.base as base import atr.blueprints.get as get import atr.form as form +import atr.paths as paths import atr.post as post import atr.shared as shared import atr.template as template @@ -41,7 +42,7 @@ async def tools(session: web.Committer, project_name: str, version_name: str, fi raise base.ASFQuartException("Invalid file path", errorcode=400) release = await session.release(project_name, version_name) - full_path = str(util.release_directory(release) / validated_path) + full_path = str(paths.release_directory(release) / validated_path) # Check that the file exists if not await aiofiles.os.path.exists(full_path): diff --git a/atr/get/file.py b/atr/get/file.py index b4f968f4..2c004b55 100644 --- a/atr/get/file.py +++ b/atr/get/file.py @@ -24,6 +24,7 @@ import atr.get.finish as finish import atr.get.vote as vote import atr.htm as htm import atr.models.sql as sql +import atr.paths as paths import atr.render as render import atr.template as template import atr.util as util @@ -42,11 +43,11 @@ async def selected(session: web.Committer, project_name: str, version_name: str) revision_number = release.latest_revision_number file_stats = [] if release.phase == sql.ReleasePhase.RELEASE: - file_stats = [stat async for stat in util.content_list(util.get_finished_dir(), project_name, version_name)] + file_stats = [stat async for stat in util.content_list(paths.get_finished_dir(), project_name, version_name)] elif revision_number is not None: file_stats = [ stat - async for stat in util.content_list(util.get_unfinished_dir(), project_name, version_name, revision_number) + async for stat in util.content_list(paths.get_unfinished_dir(), project_name, version_name, revision_number) ] else: raise ValueError("No revision number found for unfinished release") @@ -135,7 +136,7 @@ async def selected_path(session: web.Committer, project_name: str, version_name: release = await session.release(project_name, version_name, phase=None) _max_view_size = 512 * 1024 - full_path = util.release_directory(release) / validated_path + full_path = paths.release_directory(release) / validated_path content_listing = await util.archive_listing(full_path) content, is_text, is_truncated, error_message = await util.read_file_for_viewer(full_path, _max_view_size) diff --git a/atr/get/finish.py b/atr/get/finish.py index 39d36907..ac95b2c1 100644 --- a/atr/get/finish.py +++ b/atr/get/finish.py @@ -40,6 +40,7 @@ import atr.get.root as root import atr.htm as htm import atr.mapping as mapping import atr.models.sql as sql +import atr.paths as paths import atr.render as render import atr.shared as shared import atr.tasks.gha as gha @@ -178,7 +179,7 @@ async def _get_page_data( if release.phase != sql.ReleasePhase.RELEASE_PREVIEW: raise ValueError("Release is not in preview phase") - latest_revision_dir = util.release_directory(release) + latest_revision_dir = paths.release_directory(release) source_files_rel, target_dirs = await _sources_and_targets(latest_revision_dir) deletable_dirs = await _deletable_choices(latest_revision_dir, target_dirs) rc_analysis_result = await _analyse_rc_tags(latest_revision_dir) diff --git a/atr/get/published.py b/atr/get/published.py index cce3185d..fe6dacff 100644 --- a/atr/get/published.py +++ b/atr/get/published.py @@ -25,6 +25,7 @@ import quart import atr.blueprints.get as get import atr.form as form import atr.htm as htm +import atr.paths as paths import atr.util as util import atr.web as web @@ -105,7 +106,7 @@ async def _file_content(full_path: pathlib.Path) -> web.QuartResponse: async def _path(session: web.Committer, path: str) -> web.QuartResponse: - downloads_path = util.get_downloads_dir() + downloads_path = paths.get_downloads_dir() full_path = downloads_path / path if await aiofiles.os.path.isdir(full_path): return await _directory_listing(full_path, path) diff --git a/atr/get/report.py b/atr/get/report.py index c987755a..a3d194c9 100644 --- a/atr/get/report.py +++ b/atr/get/report.py @@ -23,6 +23,7 @@ import asfquart.base as base import atr.blueprints.get as get import atr.form as form import atr.models.sql as sql +import atr.paths as paths import atr.storage as storage import atr.template as template import atr.util as util @@ -57,7 +58,7 @@ async def selected_path(session: web.Committer, project_name: str, version_name: raise base.ASFQuartException("Release has no committee", errorcode=500) # TODO: When we do more than one thing in a dir, we should use the revision directory directly - abs_path = util.release_directory(release) / validated_path + abs_path = paths.release_directory(release) / validated_path if release.latest_revision_number is None: raise base.ASFQuartException("Release has no revision", errorcode=500) diff --git a/atr/get/revisions.py b/atr/get/revisions.py index aeb805bd..680227a1 100644 --- a/atr/get/revisions.py +++ b/atr/get/revisions.py @@ -33,6 +33,7 @@ import atr.get.root as root import atr.htm as htm import atr.models.schema as schema import atr.models.sql as sql +import atr.paths as paths import atr.post as post import atr.shared as shared import atr.template as template @@ -57,7 +58,7 @@ async def selected(session: web.Committer, project_name: str, version_name: str) except base.ASFQuartException: release = await session.release(project_name, version_name, phase=sql.ReleasePhase.RELEASE_PREVIEW) phase_key = "preview" - release_dir = util.release_directory_base(release) + release_dir = paths.release_directory_base(release) # Determine the current revision latest_revision_number = release.latest_revision_number diff --git a/atr/get/test.py b/atr/get/test.py index 73daa1b7..ad70cd64 100644 --- a/atr/get/test.py +++ b/atr/get/test.py @@ -31,6 +31,7 @@ import atr.get.vote as vote import atr.htm as htm import atr.models.session import atr.models.sql as sql +import atr.paths as paths import atr.shared as shared import atr.storage as storage import atr.template as template @@ -116,7 +117,7 @@ async def test_merge(session: web.Committer, project_name: str, version_name: st release = await data.release(name=release_name, _project=True).demand( RuntimeError("Release not found after merge test") ) - release_dir = util.release_directory(release) + release_dir = paths.release_directory(release) async for path in util.paths_recursive(release_dir): files.append(str(path)) diff --git a/atr/get/voting.py b/atr/get/voting.py index 83eea195..665cff66 100644 --- a/atr/get/voting.py +++ b/atr/get/voting.py @@ -29,6 +29,7 @@ import atr.get.keys as keys import atr.get.projects as projects import atr.htm as htm import atr.models.sql as sql +import atr.paths as paths import atr.post as post import atr.render as render import atr.shared as shared @@ -102,9 +103,9 @@ async def selected_revision( async def _check_keys_warning(committee: sql.Committee) -> bool: if committee.is_podling: - keys_file_path = util.get_downloads_dir() / "incubator" / committee.name / "KEYS" + keys_file_path = paths.get_downloads_dir() / "incubator" / committee.name / "KEYS" else: - keys_file_path = util.get_downloads_dir() / committee.name / "KEYS" + keys_file_path = paths.get_downloads_dir() / committee.name / "KEYS" return not await aiofiles.os.path.isfile(keys_file_path) diff --git a/atr/hashes.py b/atr/hashes.py index de29a760..35c5ac0c 100644 --- a/atr/hashes.py +++ b/atr/hashes.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +import hashlib import pathlib from typing import Any, Final @@ -40,3 +40,26 @@ async def compute_file_hash(path: str | pathlib.Path) -> str: while chunk := await f.read(_HASH_CHUNK_SIZE): hasher.update(chunk) return f"blake3:{hasher.hexdigest()}" + + +def compute_sha3_256(file_data: bytes) -> str: + """Compute SHA3-256 hash of file data.""" + return hashlib.sha3_256(file_data).hexdigest() + + +async def compute_sha512(file_path: pathlib.Path) -> str: + """Compute SHA-512 hash of a file.""" + sha512 = hashlib.sha512() + async with aiofiles.open(file_path, "rb") as f: + while chunk := await f.read(4096): + sha512.update(chunk) + return sha512.hexdigest() + + +async def file_sha3(path: str) -> str: + """Compute SHA3-256 hash of a file.""" + sha3 = hashlib.sha3_256() + async with aiofiles.open(path, "rb") as f: + while chunk := await f.read(4096): + sha3.update(chunk) + return sha3.hexdigest() diff --git a/atr/paths.py b/atr/paths.py new file mode 100644 index 00000000..0bc556e0 --- /dev/null +++ b/atr/paths.py @@ -0,0 +1,131 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pathlib + +from atr import config as config +from atr.models import sql as sql + + +def base_path_for_revision(project_name: str, version_name: str, revision: str) -> pathlib.Path: + return pathlib.Path(get_unfinished_dir(), project_name, version_name, revision) + + +def revision_path_for_file(project_name: str, version_name: str, revision: str, file_name: str) -> pathlib.Path: + return base_path_for_revision(project_name, version_name, revision) / file_name + + +def get_attestable_dir() -> pathlib.Path: + return pathlib.Path(config.get().ATTESTABLE_STORAGE_DIR) + + +def get_downloads_dir() -> pathlib.Path: + return pathlib.Path(config.get().DOWNLOADS_STORAGE_DIR) + + +def get_finished_dir() -> pathlib.Path: + return pathlib.Path(config.get().FINISHED_STORAGE_DIR) + + +def get_quarantined_dir() -> pathlib.Path: + return pathlib.Path(config.get().STATE_DIR) / "quarantined" + + +def get_tmp_dir() -> pathlib.Path: + # This must be on the same filesystem as the other state subdirectories + return pathlib.Path(config.get().STATE_DIR) / "temporary" + + +def get_unfinished_dir() -> pathlib.Path: + return pathlib.Path(config.get().UNFINISHED_STORAGE_DIR) + + +def get_upload_staging_dir(session_token: str) -> pathlib.Path: + if not session_token.isalnum(): + raise ValueError("Invalid session token") + return get_tmp_dir() / "upload-staging" / session_token + + +def quarantine_directory(quarantined: sql.Quarantined) -> pathlib.Path: + if not quarantined.token.isalnum(): + raise ValueError("Invalid quarantine token") + release = quarantined.release + return get_quarantined_dir() / release.project_name / release.version / quarantined.token + + +def release_directory(release: sql.Release) -> pathlib.Path: + """Return the absolute path to the directory containing the active files for a given release phase.""" + latest_revision_number = release.latest_revision_number + if (release.phase == sql.ReleasePhase.RELEASE) or (latest_revision_number is None): + return release_directory_base(release) + return release_directory_base(release) / latest_revision_number + + +def release_directory_base(release: sql.Release) -> pathlib.Path: + """Determine the filesystem directory for a given release based on its phase.""" + phase = release.phase + project_name = release.project.name + version_name = release.version + + base_dir: pathlib.Path | None = None + match phase: + case sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT: + base_dir = get_unfinished_dir() + case sql.ReleasePhase.RELEASE_CANDIDATE: + base_dir = get_unfinished_dir() + case sql.ReleasePhase.RELEASE_PREVIEW: + base_dir = get_unfinished_dir() + case sql.ReleasePhase.RELEASE: + base_dir = get_finished_dir() + # Do not add "case _" here + return base_dir / project_name / version_name + + +def release_directory_revision(release: sql.Release) -> pathlib.Path | None: + """Return the path to the directory containing the active files for a given release phase.""" + path_project = release.project.name + path_version = release.version + match release.phase: + case ( + sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT + | sql.ReleasePhase.RELEASE_CANDIDATE + | sql.ReleasePhase.RELEASE_PREVIEW + ): + if (path_revision := release.latest_revision_number) is None: + return None + path = get_unfinished_dir() / path_project / path_version / path_revision + case sql.ReleasePhase.RELEASE: + path = get_finished_dir() / path_project / path_version + # Do not add "case _" here + return path + + +def release_directory_version(release: sql.Release) -> pathlib.Path: + """Return the path to the directory containing the active files for a given release phase.""" + path_project = release.project.name + path_version = release.version + match release.phase: + case ( + sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT + | sql.ReleasePhase.RELEASE_CANDIDATE + | sql.ReleasePhase.RELEASE_PREVIEW + ): + path = get_unfinished_dir() / path_project / path_version + case sql.ReleasePhase.RELEASE: + path = get_finished_dir() / path_project / path_version + # Do not add "case _" here + return path diff --git a/atr/post/upload.py b/atr/post/upload.py index bee8a30e..7b4f9f50 100644 --- a/atr/post/upload.py +++ b/atr/post/upload.py @@ -32,6 +32,7 @@ import atr.form as form import atr.get as get import atr.log as log import atr.models.sql as sql +import atr.paths as paths import atr.shared as shared import atr.storage as storage import atr.storage.types as types @@ -48,7 +49,7 @@ async def finalise( await session.check_access(project_name) try: - staging_dir = util.get_upload_staging_dir(upload_session) + staging_dir = paths.get_upload_staging_dir(upload_session) except ValueError: return _json_error("Invalid session token", 400) @@ -123,7 +124,7 @@ async def stage( await session.check_access(project_name) try: - staging_dir = util.get_upload_staging_dir(upload_session) + staging_dir = paths.get_upload_staging_dir(upload_session) except ValueError: return _json_error("Invalid session token", 400) diff --git a/atr/server.py b/atr/server.py index 3119098f..6dcee179 100644 --- a/atr/server.py +++ b/atr/server.py @@ -56,6 +56,7 @@ import atr.filters as filters import atr.log as log import atr.manager as manager import atr.models.sql as sql +import atr.paths as paths import atr.preload as preload import atr.ssh as ssh import atr.svn.pubsub as pubsub @@ -172,13 +173,13 @@ def _app_dirs_setup(state_dir_str: str, hot_reload: bool) -> None: pathlib.Path(state_dir_str) / "runtime", pathlib.Path(state_dir_str) / "secrets" / "curated", pathlib.Path(state_dir_str) / "secrets" / "generated", - util.get_downloads_dir(), - util.get_finished_dir(), - util.get_quarantined_dir(), - util.get_tmp_dir(), - util.get_unfinished_dir(), + paths.get_downloads_dir(), + paths.get_finished_dir(), + paths.get_quarantined_dir(), + paths.get_tmp_dir(), + paths.get_unfinished_dir(), ] - unfinished_dir = util.get_unfinished_dir() + unfinished_dir = paths.get_unfinished_dir() for directory in directories_to_ensure: directory.mkdir(parents=True, exist_ok=True) if directory != unfinished_dir: @@ -927,7 +928,7 @@ async def _reset_request_log_context(): def _set_file_permissions_to_read_only() -> None: """Set permissions of all files in the unfinished and finished directories to read only.""" # TODO: After a migration period, incorrect permissions should be an error - directories = [util.get_unfinished_dir(), util.get_finished_dir()] + directories = [paths.get_unfinished_dir(), paths.get_finished_dir()] fixed_count = 0 for directory in directories: if not directory.exists(): diff --git a/atr/shared/web.py b/atr/shared/web.py index 03ab044f..12a3a411 100644 --- a/atr/shared/web.py +++ b/atr/shared/web.py @@ -25,6 +25,7 @@ import atr.get as get import atr.htm as htm import atr.models.results as results import atr.models.sql as sql +import atr.paths as paths import atr.post as post import atr.shared.draft as draft import atr.storage as storage @@ -48,16 +49,16 @@ async def check( can_vote: bool = False, can_resolve: bool = False, ) -> web.WerkzeugResponse | str: - base_path = util.release_directory(release) + base_path = paths.release_directory(release) # TODO: This takes 180ms for providers # We could cache it - paths = [path async for path in util.paths_recursive(base_path)] - paths.sort() + all_paths = [path async for path in util.paths_recursive(base_path)] + all_paths.sort() async with storage.read(session) as read: ragp = read.as_general_public() - info = await ragp.releases.path_info(release, paths) + info = await ragp.releases.path_info(release, all_paths) user_ssh_keys: Sequence[sql.SSHKey] = [] asf_id: str | None = None @@ -95,7 +96,7 @@ async def check( ) delete_file_forms: dict[str, htm.Element] = {} - for path in paths: + for path in all_paths: delete_file_forms[str(path)] = form.render( model_cls=draft.DeleteFileForm, action=util.as_url(post.draft.delete_file, project_name=release.project.name, version_name=release.version), @@ -130,7 +131,7 @@ async def check( vote_task_warnings = _warnings_from_vote_result(vote_task) has_files = await util.has_files(release) - has_any_errors = any(info.errors.get(path, []) for path in paths) if info else False + has_any_errors = any(info.errors.get(path, []) for path in all_paths) if info else False strict_checking = release.project.policy_strict_checking strict_checking_errors = strict_checking and has_any_errors blocker_errors = False @@ -146,7 +147,7 @@ async def check( project_name=release.project.name, version_name=release.version, release=release, - paths=paths, + paths=all_paths, info=info, revision_editor=revision_editor, revision_time=revision_timestamp, diff --git a/atr/ssh.py b/atr/ssh.py index 6f9d1433..567a4085 100644 --- a/atr/ssh.py +++ b/atr/ssh.py @@ -41,6 +41,7 @@ import atr.config as config import atr.db as db import atr.log as log import atr.models.sql as sql +import atr.paths as paths import atr.storage as storage import atr.storage.types as types import atr.user as user @@ -521,7 +522,7 @@ async def _step_07a_process_validated_rsync_read( exit_status = 1 try: # Determine the source directory based on the release phase and revision - source_dir = util.release_directory(release) + source_dir = paths.release_directory(release) log.info( f"Identified source directory for read: {source_dir} for release " f"{release.name} (phase {release.phase.value})" diff --git a/atr/storage/readers/releases.py b/atr/storage/readers/releases.py index 18a48a56..77d330fb 100644 --- a/atr/storage/readers/releases.py +++ b/atr/storage/readers/releases.py @@ -25,6 +25,7 @@ import atr.classify as classify import atr.db as db import atr.db.interaction as interaction import atr.models.sql as sql +import atr.paths as paths import atr.storage as storage import atr.storage.types as types import atr.util as util @@ -54,20 +55,20 @@ class GeneralPublic: self.__data = data self.__asf_uid = read.authorisation.asf_uid - async def path_info(self, release: sql.Release, paths: list[pathlib.Path]) -> types.PathInfo | None: + async def path_info(self, release: sql.Release, all_paths: list[pathlib.Path]) -> types.PathInfo | None: info = types.PathInfo() latest_revision_number = release.latest_revision_number if latest_revision_number is None: return None await self.__successes_errors_warnings(release, latest_revision_number, info) - base_path = util.release_directory(release) + base_path = paths.release_directory(release) source_matcher = None source_artifact_paths = release.project.policy_source_artifact_paths if source_artifact_paths: source_matcher = util.create_path_matcher(source_artifact_paths, None, base_path) - for path in paths: + for path in all_paths: info.file_types[path] = classify.classify(path, base_path=base_path, source_matcher=source_matcher) - self.__compute_checker_stats(info, paths) + self.__compute_checker_stats(info, all_paths) return info def __accumulate_results( diff --git a/atr/storage/writers/announce.py b/atr/storage/writers/announce.py index 5db5a945..aa9b0a88 100644 --- a/atr/storage/writers/announce.py +++ b/atr/storage/writers/announce.py @@ -30,6 +30,7 @@ import sqlmodel import atr.construct as construct import atr.db as db import atr.models.sql as sql +import atr.paths as paths import atr.storage as storage import atr.tasks.message as message import atr.util as util @@ -170,12 +171,12 @@ class CommitteeMember(CommitteeParticipant): subject, _ = await construct.announce_release_subject_and_body(subject_template, "", options) # Prepare paths for file operations - unfinished_revisions_path = util.release_directory_base(release) + unfinished_revisions_path = paths.release_directory_base(release) unfinished_path = unfinished_revisions_path / release.unwrap_revision_number unfinished_dir = str(unfinished_path) release_date = datetime.datetime.now(datetime.UTC) predicted_finished_release = self.__predicted_finished_release(release, release_date) - finished_path = util.release_directory(predicted_finished_release) + finished_path = paths.release_directory(predicted_finished_release) finished_dir = str(finished_path) if await aiofiles.os.path.exists(finished_dir): raise storage.AccessError("Release already exists") @@ -258,7 +259,7 @@ class CommitteeMember(CommitteeParticipant): ) -> None: """Hard link the release files to the downloads directory.""" # TODO: Rename *_dir functions to _path functions - downloads_base_path = util.get_downloads_dir() + downloads_base_path = paths.get_downloads_dir() downloads_path = downloads_base_path / committee.name / download_path_suffix.removeprefix("/") # The "exist_ok" parameter means to overwrite files if True # We only overwrite if we're not preserving, so we supply "not preserve" diff --git a/atr/storage/writers/keys.py b/atr/storage/writers/keys.py index f2cbe783..4b3cff77 100644 --- a/atr/storage/writers/keys.py +++ b/atr/storage/writers/keys.py @@ -41,6 +41,7 @@ import atr.config as config import atr.db as db import atr.log as log import atr.models.sql as sql +import atr.paths as paths import atr.storage as storage import atr.storage.outcome as outcome import atr.storage.types as types @@ -416,7 +417,7 @@ class CommitteeParticipant(FoundationCommitter): self, ) -> outcome.Outcome[str]: try: - base_downloads_dir = util.get_downloads_dir() + base_downloads_dir = paths.get_downloads_dir() committee = await self.committee() is_podling = committee.is_podling @@ -470,7 +471,7 @@ class CommitteeParticipant(FoundationCommitter): version=version_name, _committee=True, ).demand(storage.AccessError(f"Release not found: {project_name} {version_name}")) - keys_path = util.release_directory(release) / "KEYS" + keys_path = paths.release_directory(release) / "KEYS" async with aiofiles.open(keys_path, encoding="utf-8") as f: keys_file_text = await f.read() if release.committee is None: diff --git a/atr/storage/writers/policy.py b/atr/storage/writers/policy.py index 8c376388..a6ff2760 100644 --- a/atr/storage/writers/policy.py +++ b/atr/storage/writers/policy.py @@ -24,9 +24,9 @@ import strictyaml import strictyaml.ruamel.error as error import atr.db as db +import atr.hashes as hashes import atr.models as models import atr.storage as storage -import atr.util as util if TYPE_CHECKING: import atr.shared as shared @@ -190,8 +190,8 @@ class CommitteeMember(CommitteeParticipant): ) -> None: submitted_subject = submitted_subject.strip() current_default_text = project.policy_announce_release_subject_default - current_default_hash = util.compute_sha3_256(current_default_text.encode()) - submitted_hash = util.compute_sha3_256(submitted_subject.encode()) + current_default_hash = hashes.compute_sha3_256(current_default_text.encode()) + submitted_hash = hashes.compute_sha3_256(submitted_subject.encode()) if submitted_hash == current_default_hash: release_policy.announce_release_subject = "" @@ -206,8 +206,8 @@ class CommitteeMember(CommitteeParticipant): ) -> None: submitted_template = submitted_template.replace("\r\n", "\n") current_default_text = project.policy_announce_release_default - current_default_hash = util.compute_sha3_256(current_default_text.encode()) - submitted_hash = util.compute_sha3_256(submitted_template.encode()) + current_default_hash = hashes.compute_sha3_256(current_default_text.encode()) + submitted_hash = hashes.compute_sha3_256(submitted_template.encode()) if submitted_hash == current_default_hash: release_policy.announce_release_template = "" @@ -235,8 +235,8 @@ class CommitteeMember(CommitteeParticipant): ) -> None: submitted_subject = submitted_subject.strip() current_default_text = project.policy_start_vote_subject_default - current_default_hash = util.compute_sha3_256(current_default_text.encode()) - submitted_hash = util.compute_sha3_256(submitted_subject.encode()) + current_default_hash = hashes.compute_sha3_256(current_default_text.encode()) + submitted_hash = hashes.compute_sha3_256(submitted_subject.encode()) if submitted_hash == current_default_hash: release_policy.start_vote_subject = "" @@ -251,8 +251,8 @@ class CommitteeMember(CommitteeParticipant): ) -> None: submitted_template = submitted_template.replace("\r\n", "\n") current_default_text = project.policy_start_vote_default - current_default_hash = util.compute_sha3_256(current_default_text.encode()) - submitted_hash = util.compute_sha3_256(submitted_template.encode()) + current_default_hash = hashes.compute_sha3_256(current_default_text.encode()) + submitted_hash = hashes.compute_sha3_256(submitted_template.encode()) if submitted_hash == current_default_hash: release_policy.start_vote_template = "" diff --git a/atr/storage/writers/release.py b/atr/storage/writers/release.py index d05b80d4..411bd642 100644 --- a/atr/storage/writers/release.py +++ b/atr/storage/writers/release.py @@ -36,6 +36,7 @@ import atr.form as form import atr.log as log import atr.models.api as api import atr.models.sql as sql +import atr.paths as paths import atr.storage as storage import atr.storage.types as types import atr.util as util @@ -104,8 +105,8 @@ class CommitteeParticipant(FoundationCommitter): project_name=project_name, version=version, phase=phase, _committee=True ).demand(storage.AccessError(f"Release '{project_name} {version}' not found.")) release_dirs = [ - util.release_directory_base(release), - util.get_attestable_dir() / project_name / version, + paths.release_directory_base(release), + paths.get_attestable_dir() / project_name / version, ] # Delete from the database using bulk SQL DELETE for efficiency @@ -527,7 +528,7 @@ class CommitteeParticipant(FoundationCommitter): async def __delete_release_data_downloads(self, release: sql.Release) -> None: # Delete hard links from the downloads directory - finished_dir = util.release_directory(release) + finished_dir = paths.release_directory(release) if await aiofiles.os.path.isdir(finished_dir): release_inodes = set() async for file_path in util.paths_recursive(finished_dir): @@ -538,7 +539,7 @@ class CommitteeParticipant(FoundationCommitter): continue if release_inodes: - downloads_dir = util.get_downloads_dir() + downloads_dir = paths.get_downloads_dir() async for link_path in util.paths_recursive(downloads_dir): full_link_path = downloads_dir / link_path try: diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py index 16d1f00b..9e7bbbd0 100644 --- a/atr/storage/writers/revision.py +++ b/atr/storage/writers/revision.py @@ -36,6 +36,7 @@ import atr.db.interaction as interaction import atr.detection as detection import atr.merge as merge import atr.models.sql as sql +import atr.paths as paths import atr.storage as storage import atr.storage.types as types import atr.tasks as tasks @@ -136,16 +137,16 @@ class CommitteeParticipant(FoundationCommitter): release.check_cache_key = None if clone_from is not None: - old_release_dir = util.release_directory_base(release) / clone_from + old_release_dir = paths.release_directory_base(release) / clone_from else: - old_release_dir = util.release_directory(release) + old_release_dir = paths.release_directory(release) merge_enabled = clone_from is None # Create a temporary directory # We ensure, below, that it's removed on any exception # Use the tmp subdirectory of state, to ensure that it is on the same filesystem prefix_token = secrets.token_hex(16) - temp_dir: str = await asyncio.to_thread(tempfile.mkdtemp, prefix=prefix_token + "-", dir=util.get_tmp_dir()) + temp_dir: str = await asyncio.to_thread(tempfile.mkdtemp, prefix=prefix_token + "-", dir=paths.get_tmp_dir()) temp_dir_path = pathlib.Path(temp_dir) try: @@ -233,7 +234,7 @@ class CommitteeParticipant(FoundationCommitter): and (prior_name != old_revision.name) ): prior_number = prior_name.split()[-1] - prior_dir = util.release_directory_base(release) / prior_number + prior_dir = paths.release_directory_base(release) / prior_number await merge.merge( base_inodes, base_hashes, @@ -250,7 +251,7 @@ class CommitteeParticipant(FoundationCommitter): # Rename the directory to the new revision number await data.refresh(release) - new_revision_dir = util.release_directory(release) + new_revision_dir = paths.release_directory(release) # Ensure that the parent directory exists await aiofiles.os.makedirs(new_revision_dir.parent, exist_ok=True) diff --git a/atr/tasks/__init__.py b/atr/tasks/__init__.py index 282db9cf..6c4a47f0 100644 --- a/atr/tasks/__init__.py +++ b/atr/tasks/__init__.py @@ -30,6 +30,7 @@ import atr.db as db import atr.hashes as hashes import atr.models.results as results import atr.models.sql as sql +import atr.paths as file_paths import atr.tasks.checks as checks import atr.tasks.checks.compare as compare import atr.tasks.checks.hashing as hashing @@ -134,7 +135,7 @@ async def draft_checks( """Core logic to analyse a draft revision and queue checks.""" # Construct path to the specific revision # We don't have the release object here, so we can't use util.release_directory - revision_path = util.get_unfinished_dir() / project_name / release_version / revision_number + revision_path = file_paths.get_unfinished_dir() / project_name / release_version / revision_number relative_paths = [path async for path in util.paths_recursive(revision_path)] async with db.ensure_session(caller_data) as data: diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py index 4bdcf40b..1dd9b0dc 100644 --- a/atr/tasks/checks/__init__.py +++ b/atr/tasks/checks/__init__.py @@ -36,11 +36,11 @@ if TYPE_CHECKING: import atr.attestable as attestable import atr.db as db -import atr.file_paths as file_paths import atr.hashes as hashes import atr.log as log import atr.models.github as github_models import atr.models.sql as sql +import atr.paths as file_paths import atr.util as util diff --git a/atr/tasks/checks/compare.py b/atr/tasks/checks/compare.py index 72905a2d..013f7839 100644 --- a/atr/tasks/checks/compare.py +++ b/atr/tasks/checks/compare.py @@ -42,6 +42,7 @@ import atr.config as config import atr.log as log import atr.models.github as github_models import atr.models.results as results +import atr.paths as paths import atr.tasks.checks as checks import atr.util as util @@ -102,7 +103,7 @@ async def source_trees(args: checks.FunctionArguments) -> results.Results | None return None max_extract_size = args.extra_args.get("max_extract_size", _CONFIG.MAX_EXTRACT_SIZE) chunk_size = args.extra_args.get("chunk_size", _CONFIG.EXTRACT_CHUNK_SIZE) - tmp_dir = util.get_tmp_dir() + tmp_dir = paths.get_tmp_dir() await aiofiles.os.makedirs(tmp_dir, exist_ok=True) async with util.async_temporary_directory(prefix="trees-", dir=tmp_dir) as temp_dir: github_dir = temp_dir / "github" diff --git a/atr/tasks/sbom.py b/atr/tasks/sbom.py index 7c60d88d..5a6539f0 100644 --- a/atr/tasks/sbom.py +++ b/atr/tasks/sbom.py @@ -30,6 +30,7 @@ import atr.log as log import atr.models.results as results import atr.models.schema as schema import atr.models.sql as sql +import atr.paths as paths import atr.sbom as sbom import atr.storage as storage import atr.tasks.checks as checks @@ -81,7 +82,7 @@ class ScoreArgs(FileArgs): @checks.with_model(FileArgs) async def augment(args: FileArgs) -> results.Results | None: - base_dir = util.get_unfinished_dir() / args.project_name / args.version_name / args.revision_number + base_dir = paths.get_unfinished_dir() / args.project_name / args.version_name / args.revision_number if not await aiofiles.os.path.isdir(base_dir): raise SBOMScoringError("Revision directory does not exist", {"base_dir": str(base_dir)}) full_path = base_dir / args.file_path @@ -141,7 +142,7 @@ async def generate_cyclonedx(args: GenerateCycloneDX) -> results.Results | None: @checks.with_model(FileArgs) async def osv_scan(args: FileArgs) -> results.Results | None: - base_dir = util.get_unfinished_dir() / args.project_name / args.version_name / args.revision_number + base_dir = paths.get_unfinished_dir() / args.project_name / args.version_name / args.revision_number if not await aiofiles.os.path.isdir(base_dir): raise SBOMScanningError("Revision directory does not exist", {"base_dir": str(base_dir)}) full_path = base_dir / args.file_path @@ -198,7 +199,7 @@ async def osv_scan(args: FileArgs) -> results.Results | None: @checks.with_model(FileArgs) async def score_qs(args: FileArgs) -> results.Results | None: - base_dir = util.get_unfinished_dir() / args.project_name / args.version_name / args.revision_number + base_dir = paths.get_unfinished_dir() / args.project_name / args.version_name / args.revision_number if not await aiofiles.os.path.isdir(base_dir): raise SBOMScoringError("Revision directory does not exist", {"base_dir": str(base_dir)}) full_path = base_dir / args.file_path @@ -234,10 +235,10 @@ async def score_qs(args: FileArgs) -> results.Results | None: @checks.with_model(ScoreArgs) async def score_tool(args: ScoreArgs) -> results.Results | None: - base_dir = util.get_unfinished_dir() / args.project_name / args.version_name / args.revision_number + base_dir = paths.get_unfinished_dir() / args.project_name / args.version_name / args.revision_number previous_base_dir = None if args.previous_release_version is not None: - previous_base_dir = util.get_finished_dir() / args.project_name / args.previous_release_version + previous_base_dir = paths.get_finished_dir() / args.project_name / args.previous_release_version if not await aiofiles.os.path.isdir(base_dir): raise SBOMScoringError("Revision directory does not exist", {"base_dir": str(base_dir)}) full_path = base_dir / args.file_path diff --git a/atr/util.py b/atr/util.py index e412f74c..6e55f577 100644 --- a/atr/util.py +++ b/atr/util.py @@ -58,6 +58,7 @@ import atr.models.validation as validation import atr.registry as registry import atr.tarzip as tarzip import atr.user as user +from atr.paths import release_directory, release_directory_revision ARCHIVE_ROOT_SUFFIXES: Final[tuple[str, ...]] = ("-source", "-src") DIRECTORY_PERMISSIONS: Final[int] = 0o755 @@ -246,20 +247,6 @@ def committee_is_standing(committee_name: str) -> bool: return committee_name in registry.STANDING_COMMITTEES -def compute_sha3_256(file_data: bytes) -> str: - """Compute SHA3-256 hash of file data.""" - return hashlib.sha3_256(file_data).hexdigest() - - -async def compute_sha512(file_path: pathlib.Path) -> str: - """Compute SHA-512 hash of a file.""" - sha512 = hashlib.sha512() - async with aiofiles.open(file_path, "rb") as f: - while chunk := await f.read(4096): - sha512.update(chunk) - return sha512.hexdigest() - - async def content_list( phase_subdir: pathlib.Path, project_name: str, version_name: str, revision_name: str | None = None ) -> AsyncGenerator[FileStat]: @@ -449,15 +436,6 @@ async def email_to_uid_map() -> dict[str, str]: return email_to_uid -async def file_sha3(path: str) -> str: - """Compute SHA3-256 hash of a file.""" - sha3 = hashlib.sha3_256() - async with aiofiles.open(path, "rb") as f: - while chunk := await f.read(4096): - sha3.update(chunk) - return sha3.hexdigest() - - def format_datetime(dt_obj: datetime.datetime | int) -> str: """Format a datetime object or Unix timestamp into a human readable datetime string.""" # Integers are unix timestamps @@ -542,22 +520,6 @@ async def get_asf_id_or_die() -> str: return web_session.uid -def get_attestable_dir() -> pathlib.Path: - return pathlib.Path(config.get().ATTESTABLE_STORAGE_DIR) - - -def get_downloads_dir() -> pathlib.Path: - return pathlib.Path(config.get().DOWNLOADS_STORAGE_DIR) - - -def get_finished_dir() -> pathlib.Path: - return pathlib.Path(config.get().FINISHED_STORAGE_DIR) - - -def get_quarantined_dir() -> pathlib.Path: - return pathlib.Path(config.get().STATE_DIR) / "quarantined" - - async def get_release_stats(release: sql.Release) -> tuple[int, int, str]: """Calculate file count, total byte size, and formatted size for a release.""" base_dir = release_directory(release) @@ -580,21 +542,6 @@ async def get_release_stats(release: sql.Release) -> tuple[int, int, str]: return count, total_bytes, formatted_size -def get_tmp_dir() -> pathlib.Path: - # This must be on the same filesystem as the other state subdirectories - return pathlib.Path(config.get().STATE_DIR) / "temporary" - - -def get_unfinished_dir() -> pathlib.Path: - return pathlib.Path(config.get().UNFINISHED_STORAGE_DIR) - - -def get_upload_staging_dir(session_token: str) -> pathlib.Path: - if not session_token.isalnum(): - raise ValueError("Invalid session token") - return get_tmp_dir() / "upload-staging" / session_token - - async def get_urls_as_completed(urls: Sequence[str]) -> AsyncGenerator[tuple[str, int | str | None, bytes]]: """GET a list of URLs in parallel and yield (url, status, content_bytes) as they become available.""" async with create_secure_session() as session: @@ -893,13 +840,6 @@ def plural(count: int, singular: str, plural_form: str | None = None, *, include return word -def quarantine_directory(quarantined: sql.Quarantined) -> pathlib.Path: - if not quarantined.token.isalnum(): - raise ValueError("Invalid quarantine token") - release = quarantined.release - return get_quarantined_dir() / release.project_name / release.version / quarantined.token - - async def read_file_for_viewer(full_path: pathlib.Path, max_size: int) -> tuple[str | None, bool, bool, str | None]: """Read file content for viewer.""" content: str | None = None @@ -943,70 +883,6 @@ async def read_file_for_viewer(full_path: pathlib.Path, max_size: int) -> tuple[ return content, is_text, is_truncated, error_message -def release_directory(release: sql.Release) -> pathlib.Path: - """Return the absolute path to the directory containing the active files for a given release phase.""" - latest_revision_number = release.latest_revision_number - if (release.phase == sql.ReleasePhase.RELEASE) or (latest_revision_number is None): - return release_directory_base(release) - return release_directory_base(release) / latest_revision_number - - -def release_directory_base(release: sql.Release) -> pathlib.Path: - """Determine the filesystem directory for a given release based on its phase.""" - phase = release.phase - project_name = release.project.name - version_name = release.version - - base_dir: pathlib.Path | None = None - match phase: - case sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT: - base_dir = get_unfinished_dir() - case sql.ReleasePhase.RELEASE_CANDIDATE: - base_dir = get_unfinished_dir() - case sql.ReleasePhase.RELEASE_PREVIEW: - base_dir = get_unfinished_dir() - case sql.ReleasePhase.RELEASE: - base_dir = get_finished_dir() - # Do not add "case _" here - return base_dir / project_name / version_name - - -def release_directory_revision(release: sql.Release) -> pathlib.Path | None: - """Return the path to the directory containing the active files for a given release phase.""" - path_project = release.project.name - path_version = release.version - match release.phase: - case ( - sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT - | sql.ReleasePhase.RELEASE_CANDIDATE - | sql.ReleasePhase.RELEASE_PREVIEW - ): - if (path_revision := release.latest_revision_number) is None: - return None - path = get_unfinished_dir() / path_project / path_version / path_revision - case sql.ReleasePhase.RELEASE: - path = get_finished_dir() / path_project / path_version - # Do not add "case _" here - return path - - -def release_directory_version(release: sql.Release) -> pathlib.Path: - """Return the path to the directory containing the active files for a given release phase.""" - path_project = release.project.name - path_version = release.version - match release.phase: - case ( - sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT - | sql.ReleasePhase.RELEASE_CANDIDATE - | sql.ReleasePhase.RELEASE_PREVIEW - ): - path = get_unfinished_dir() / path_project / path_version - case sql.ReleasePhase.RELEASE: - path = get_finished_dir() / path_project / path_version - # Do not add "case _" here - return path - - async def session_cache_read() -> dict[str, dict]: cache_path = pathlib.Path(config.get().STATE_DIR) / "cache" / "user_session_cache.json" try: diff --git a/atr/validate.py b/atr/validate.py index 760dfc56..7688f823 100644 --- a/atr/validate.py +++ b/atr/validate.py @@ -23,7 +23,7 @@ from typing import NamedTuple, TypeVar import atr.db as db import atr.models.sql as sql -import atr.util as util +import atr.paths as paths class Divergence(NamedTuple): @@ -330,7 +330,7 @@ def release_name(r: sql.Release) -> Divergences: @release_components("Release") def release_on_disk(r: sql.Release) -> Divergences: """Check that the release is on disk.""" - path = util.release_directory(r) + path = paths.release_directory(r) def okay(p: pathlib.Path) -> bool: # The release directory must exist and contain at least one entry diff --git a/tests/unit/test_checks_compare.py b/tests/unit/test_checks_compare.py index db9a5f48..7cb58981 100644 --- a/tests/unit/test_checks_compare.py +++ b/tests/unit/test_checks_compare.py @@ -684,7 +684,7 @@ async def test_source_trees_creates_temp_workspace_and_cleans_up( monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare) - monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", ReturnValue(tmp_root)) + monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) await atr.tasks.checks.compare.source_trees(args) @@ -721,7 +721,7 @@ async def test_source_trees_payload_none_skips_temp_workspace(monkeypatch: pytes "_decompress_archive", RaiseAsync("_decompress_archive should not be called"), ) - monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", RaiseSync("get_tmp_dir should not be called")) + monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", RaiseSync("get_tmp_dir should not be called")) await atr.tasks.checks.compare.source_trees(args) @@ -754,7 +754,7 @@ async def test_source_trees_permits_pkg_info_when_pyproject_toml_exists( monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress_with_pyproject) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare) - monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", ReturnValue(tmp_root)) + monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) await atr.tasks.checks.compare.source_trees(args) @@ -780,7 +780,7 @@ async def test_source_trees_records_failure_when_archive_has_invalid_files( monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare) - monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", ReturnValue(tmp_root)) + monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) await atr.tasks.checks.compare.source_trees(args) @@ -809,7 +809,7 @@ async def test_source_trees_records_failure_when_archive_root_not_found( monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", checkout) monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) - monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", ReturnValue(tmp_root)) + monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) await atr.tasks.checks.compare.source_trees(args) @@ -833,7 +833,7 @@ async def test_source_trees_records_failure_when_decompress_fails( monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", PayloadLoader(payload)) monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", checkout) monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) - monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", ReturnValue(tmp_root)) + monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) await atr.tasks.checks.compare.source_trees(args) @@ -861,7 +861,7 @@ async def test_source_trees_records_failure_when_extra_entries_in_archive( monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", checkout) monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) - monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", ReturnValue(tmp_root)) + monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) await atr.tasks.checks.compare.source_trees(args) @@ -892,7 +892,7 @@ async def test_source_trees_reports_repo_only_sample_limited_to_five( monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare) - monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", ReturnValue(tmp_root)) + monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) await atr.tasks.checks.compare.source_trees(args) diff --git a/tests/unit/test_create_revision.py b/tests/unit/test_create_revision.py index 630919be..635dcdee 100644 --- a/tests/unit/test_create_revision.py +++ b/tests/unit/test_create_revision.py @@ -134,10 +134,10 @@ async def test_clone_from_older_revision_skips_merge_without_intervening_change( mock.patch.object( revision.util, "create_hard_link_clone", new_callable=mock.AsyncMock ) as create_hard_link_clone_mock, - mock.patch.object(revision.util, "get_tmp_dir", return_value=tmp_path), + mock.patch.object(revision.paths, "get_tmp_dir", return_value=tmp_path), mock.patch.object(revision.util, "paths_to_inodes", return_value={}) as paths_to_inodes_mock, - mock.patch.object(revision.util, "release_directory", return_value=tmp_path / "releases" / "00006"), - mock.patch.object(revision.util, "release_directory_base", return_value=tmp_path / "releases"), + mock.patch.object(revision.paths, "release_directory", return_value=tmp_path / "releases" / "00006"), + mock.patch.object(revision.paths, "release_directory_base", return_value=tmp_path / "releases"), ): await participant.create_revision("proj", "1.0", "test", clone_from="00002") @@ -187,7 +187,7 @@ async def test_modify_failed_error_propagates_and_cleans_up(tmp_path: pathlib.Pa with ( mock.patch.object(revision.db, "session", return_value=mock_session), mock.patch.object(revision.interaction, "latest_revision", new_callable=mock.AsyncMock, return_value=None), - mock.patch.object(revision.util, "get_tmp_dir", return_value=tmp_path), + mock.patch.object(revision.paths, "get_tmp_dir", return_value=tmp_path), ): with pytest.raises(types.FailedError, match="Intentional error"): await participant.create_revision("proj", "1.0", "test", modify=modify) diff --git a/tests/unit/test_paths.py b/tests/unit/test_paths.py new file mode 100644 index 00000000..0c36c46b --- /dev/null +++ b/tests/unit/test_paths.py @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pathlib +import types + +import pytest + +import atr.paths as paths + + +def test_get_quarantined_dir_uses_state_dir(monkeypatch, tmp_path: pathlib.Path): + mock_config = types.SimpleNamespace(STATE_DIR=str(tmp_path)) + monkeypatch.setattr("atr.config.get", lambda: mock_config) + assert paths.get_quarantined_dir() == tmp_path / "quarantined" + + +def test_quarantine_directory_builds_deterministic_path(monkeypatch, tmp_path: pathlib.Path): + mock_config = types.SimpleNamespace(STATE_DIR=str(tmp_path)) + monkeypatch.setattr("atr.config.get", lambda: mock_config) + mock_release = types.SimpleNamespace(project_name="example", version="1.2.3") + quarantined = types.SimpleNamespace(release=mock_release, token="0123456789abcdef") + assert ( + paths.quarantine_directory(quarantined) == tmp_path / "quarantined" / "example" / "1.2.3" / "0123456789abcdef" + ) + + +def test_quarantine_directory_rejects_non_alnum_token(): + quarantined = types.SimpleNamespace(token="../escape") + with pytest.raises(ValueError, match="Invalid quarantine token"): + paths.quarantine_directory(quarantined) diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py index aec4aaac..20ff6505 100644 --- a/tests/unit/test_util.py +++ b/tests/unit/test_util.py @@ -19,9 +19,6 @@ import json import os import pathlib import stat -import types - -import pytest import atr.util as util @@ -95,12 +92,6 @@ def test_chmod_files_sets_default_permissions(tmp_path: pathlib.Path): assert file_mode == 0o444 -def test_get_quarantined_dir_uses_state_dir(monkeypatch, tmp_path: pathlib.Path): - mock_config = types.SimpleNamespace(STATE_DIR=str(tmp_path)) - monkeypatch.setattr("atr.config.get", lambda: mock_config) - assert util.get_quarantined_dir() == tmp_path / "quarantined" - - def test_json_for_script_element_escapes_correctly(): payload = ["example.txt", "</script><script>alert(1)</script>", "apple&banana"] @@ -111,17 +102,3 @@ def test_json_for_script_element_escapes_correctly(): assert "apple&banana" not in serialized assert "apple\\u0026banana" in serialized assert json.loads(serialized) == payload - - -def test_quarantine_directory_builds_deterministic_path(monkeypatch, tmp_path: pathlib.Path): - mock_config = types.SimpleNamespace(STATE_DIR=str(tmp_path)) - monkeypatch.setattr("atr.config.get", lambda: mock_config) - mock_release = types.SimpleNamespace(project_name="example", version="1.2.3") - quarantined = types.SimpleNamespace(release=mock_release, token="0123456789abcdef") - assert util.quarantine_directory(quarantined) == tmp_path / "quarantined" / "example" / "1.2.3" / "0123456789abcdef" - - -def test_quarantine_directory_rejects_non_alnum_token(): - quarantined = types.SimpleNamespace(token="../escape") - with pytest.raises(ValueError, match="Invalid quarantine token"): - util.quarantine_directory(quarantined) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
