This is an automated email from the ASF dual-hosted git repository. arm pushed a commit to branch arm in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
commit 0bcd3a7f5b2c85412e293d152a0217263ce3901b Author: Alastair McFarlane <[email protected]> AuthorDate: Fri Mar 20 13:13:57 2026 +0000 #915 - Add safe path type --- atr/blueprints/common.py | 2 ++ atr/form.py | 55 +++++---------------------------------- atr/get/docs.py | 10 +++----- atr/get/download.py | 20 +++++---------- atr/get/draft.py | 15 ++++------- atr/get/file.py | 8 ++---- atr/get/published.py | 10 +++----- atr/get/ref.py | 19 ++++++-------- atr/get/report.py | 10 +++----- atr/get/sbom.py | 8 ++---- atr/models/api.py | 6 ++--- atr/models/safe.py | 53 ++++++++++++++++++++++++++++++++++++++ atr/post/announce.py | 2 +- atr/post/draft.py | 33 ++++++++++-------------- atr/post/finish.py | 8 +++--- atr/post/sbom.py | 39 ++++++++++++---------------- atr/post/upload.py | 12 +++++---- atr/shared/announce.py | 24 +++-------------- atr/shared/draft.py | 3 ++- atr/shared/finish.py | 14 +++++----- atr/shared/upload.py | 5 ++-- atr/storage/writers/announce.py | 8 +++--- atr/storage/writers/release.py | 57 +++++++++++++++++++---------------------- atr/tasks/gha.py | 3 +-- atr/tasks/svn.py | 3 ++- atr/util.py | 25 ------------------ 26 files changed, 188 insertions(+), 264 deletions(-) diff --git a/atr/blueprints/common.py b/atr/blueprints/common.py index c5e26a32..ace99ff4 100644 --- a/atr/blueprints/common.py +++ b/atr/blueprints/common.py @@ -38,6 +38,7 @@ import atr.web as web QUART_CONVERTERS: Final[dict[Any, str]] = { int: "int", float: "float", + safe.RelPath: "path", unsafe.Path: "path", } @@ -45,6 +46,7 @@ VALIDATED_TYPES: Final[set[Any]] = { safe.Alphanumeric, safe.CommitteeKey, safe.ProjectKey, + safe.RelPath, safe.RevisionNumber, safe.VersionKey, unsafe.UnsafeStr, diff --git a/atr/form.py b/atr/form.py index 444d9888..ff2e99aa 100644 --- a/atr/form.py +++ b/atr/form.py @@ -33,6 +33,7 @@ import quart.datastructures as datastructures import quart_wtf.utils as utils import atr.htm as htm +import atr.models.safe as safe import atr.models.schema as schema import atr.util as util @@ -445,33 +446,14 @@ def to_optional_url(v: Any) -> pydantic.HttpUrl | None: return pydantic.TypeAdapter(pydantic.HttpUrl).validate_python(v) -def to_relpath(v: Any) -> pathlib.Path | None: - """Validate a relative filesystem path.""" - if not v: - return None - - path_str = str(v).strip() - if not path_str: - raise ValueError("Path cannot be empty") - - util.validate_relative_path_str(path_str) - return pathlib.Path(path_str) - - -def to_relpath_list(v: Any) -> list[pathlib.Path]: +def to_safe_path_list(v: Any) -> list[safe.RelPath]: if isinstance(v, list): result = [] for item in v: - validated = to_relpath(item) - if validated is None: - raise ValueError("Path list items cannot be empty") - result.append(validated) + result.append(safe.RelPath(item)) return result if isinstance(v, str): - validated = to_relpath(v) - if validated is None: - raise ValueError("Path cannot be empty") - return [validated] + return [safe.RelPath(v)] raise ValueError(f"Expected a path or list of paths, got {type(v).__name__}") @@ -484,18 +466,6 @@ def to_str_list(v: Any) -> list[str]: raise ValueError(f"Expected a string or list of strings, got {type(v).__name__}") -def to_url_path(v: Any) -> str | None: - """Validate a relative URL style path, e.g. for SVN paths.""" - if not v: - return None - - path_str = str(v).strip() - if not path_str: - raise ValueError("Path cannot be empty") - - return util.validate_relative_path_str(path_str) - - # Validator types come before other functions # We must not use the "type" keyword here, otherwise Pydantic complains @@ -567,16 +537,9 @@ OptionalURL = Annotated[ functional_validators.BeforeValidator(to_optional_url), pydantic.Field(default=None), ] - -RelPath = Annotated[ - pathlib.Path | None, - functional_validators.BeforeValidator(to_relpath), - pydantic.Field(default=None), -] - RelPathList = Annotated[ - list[pathlib.Path], - functional_validators.BeforeValidator(to_relpath_list), + list[safe.RelPath], + functional_validators.BeforeValidator(to_safe_path_list), pydantic.Field(default_factory=list), ] @@ -586,12 +549,6 @@ StrList = Annotated[ pydantic.Field(default_factory=list), ] -URLPath = Annotated[ - str | None, - functional_validators.BeforeValidator(to_url_path), - pydantic.Field(default=None), -] - class Set[EnumType: enum.Enum]: def __iter__(self) -> Iterator[EnumType]: diff --git a/atr/get/docs.py b/atr/get/docs.py index c526dd4e..2814f87b 100644 --- a/atr/get/docs.py +++ b/atr/get/docs.py @@ -26,8 +26,7 @@ import quart import atr.blueprints.get as get import atr.config as config -import atr.form as form -import atr.models.unsafe as unsafe +import atr.models.safe as safe import atr.template as template import atr.web as web @@ -57,11 +56,8 @@ async def index(_session: web.Public, _docs: Literal["docs"], _: Literal[""]) -> @get.typed -async def page(_session: web.Public, _docs: Literal["docs"], path: unsafe.Path) -> str: - validated_page = form.to_relpath(path) - if validated_page is None: - quart.abort(400) - return await _serve_docs_page(str(validated_page)) +async def page(_session: web.Public, _docs: Literal["docs"], path: safe.RelPath) -> str: + return await _serve_docs_page(str(path)) async def _serve_docs_page(page: str) -> str: diff --git a/atr/get/download.py b/atr/get/download.py index 84cc1125..a2105024 100644 --- a/atr/get/download.py +++ b/atr/get/download.py @@ -28,12 +28,10 @@ import zipstream import atr.blueprints.get as get import atr.config as config import atr.db as db -import atr.form as form import atr.htm as htm import atr.mapping as mapping import atr.models.safe as safe import atr.models.sql as sql -import atr.models.unsafe as unsafe import atr.paths as paths import atr.template as template import atr.util as util @@ -85,13 +83,13 @@ async def path( _download_path: Literal["download/path"], project_key: safe.ProjectKey, version_key: safe.VersionKey, - file_path: unsafe.Path, + file_path: safe.RelPath, ) -> web.Response: """ URL: /download/path/<project_key>/<version_key>/<path:file_path> Download a file or list a directory from a release in any phase. """ - return await _download_or_list(project_key, version_key, str(file_path)) + return await _download_or_list(project_key, version_key, file_path) @get.typed @@ -105,7 +103,7 @@ async def path_empty( URL: /download/path/<project_key>/<version_key>/ List files at the root of a release directory for download. """ - return await _download_or_list(project_key, version_key, ".") + return await _download_or_list(project_key, version_key, None) @get.typed @@ -195,17 +193,13 @@ async def zip_selected( return web.ZipResponse(stream_zip(files_to_zip), headers=headers) -async def _download_or_list(project_key: safe.ProjectKey, version_key: safe.VersionKey, file_path: str) -> web.Response: +async def _download_or_list( + project_key: safe.ProjectKey, version_key: safe.VersionKey, file_path: safe.RelPath | None +) -> web.Response: """Download a file or list a directory from a release in any phase.""" import atr.get.root as root - # Validate the path, and allow "." for root directory - if file_path == ".": - validated_path = pathlib.Path(".") - else: - validated_path = form.to_relpath(file_path) - if validated_path is None: - raise base.ASFQuartException("Invalid file path", errorcode=400) + validated_path = file_path.as_path() if file_path is not None else pathlib.Path(".") # We allow downloading files from any phase async with db.session() as data: diff --git a/atr/get/draft.py b/atr/get/draft.py index ad3744b9..538915db 100644 --- a/atr/get/draft.py +++ b/atr/get/draft.py @@ -26,7 +26,6 @@ import asfquart.base as base import atr.blueprints.get as get import atr.form as form import atr.models.safe as safe -import atr.models.unsafe as unsafe import atr.paths as paths import atr.post as post import atr.shared as shared @@ -41,16 +40,14 @@ async def tools( _draft_tools: Literal["draft/tools"], project_key: safe.ProjectKey, version_key: safe.VersionKey, - file_path: unsafe.Path, + file_path: safe.RelPath, ) -> str: """ URL: /draft/tools/<project_key>/<version_key>/<path:file_path> Show the tools for a specific file. """ await session.check_access(project_key) - validated_path = form.to_relpath(str(file_path)) - if validated_path is None: - raise base.ASFQuartException("Invalid file path", errorcode=400) + validated_path = file_path.as_path() release = await session.release(project_key, version_key) full_path = str(paths.release_directory(release) / validated_path) @@ -62,8 +59,6 @@ async def tools( modified = int(await aiofiles.os.path.getmtime(full_path)) file_size = await aiofiles.os.path.getsize(full_path) - validated_path_str = str(validated_path) - file_data = { "filename": validated_path.name, "bytes_size": file_size, @@ -74,7 +69,7 @@ async def tools( post.draft.hashgen, project_key=str(project_key), version_key=str(version_key), - file_path=validated_path_str, + file_path=str(file_path), ) sha512_form = form.render( model_cls=shared.draft.HashGen, @@ -89,7 +84,7 @@ async def tools( post.draft.sbomgen, project_key=str(project_key), version_key=str(version_key), - file_path=validated_path_str, + file_path=str(file_path), ), submit_label="Generate CycloneDX SBOM (.cdx.json)", submit_classes="btn-outline-secondary", @@ -101,7 +96,7 @@ async def tools( asf_id=session.uid, project_key=str(project_key), version_key=str(version_key), - file_path=validated_path_str, + file_path=str(file_path), file_data=file_data, release=release, format_file_size=util.format_file_size, diff --git a/atr/get/file.py b/atr/get/file.py index f081f459..75e3833d 100644 --- a/atr/get/file.py +++ b/atr/get/file.py @@ -18,14 +18,12 @@ from typing import Literal import atr.blueprints.get as get -import atr.form as form import atr.get.compose as compose import atr.get.finish as finish import atr.get.vote as vote import atr.htm as htm import atr.models.safe as safe import atr.models.sql as sql -import atr.models.unsafe as unsafe import atr.paths as paths import atr.render as render import atr.template as template @@ -143,15 +141,13 @@ async def selected_path( _file: Literal["file"], project_key: safe.ProjectKey, version_key: safe.VersionKey, - file_path: unsafe.Path, + file_path: safe.RelPath, ) -> str: """ URL: /file/<project_key>/<version_key>/<path:file_path> View the content of a specific file in a release (any phase). """ - validated_path = form.to_relpath(str(file_path)) - if validated_path is None: - raise web.FlashError("Invalid file path") + validated_path = file_path.as_path() release = await session.release(project_key, version_key, phase=None) _max_view_size = 512 * 1024 diff --git a/atr/get/published.py b/atr/get/published.py index 44cd7815..4c0d7281 100644 --- a/atr/get/published.py +++ b/atr/get/published.py @@ -25,16 +25,15 @@ import quart import atr.blueprints.get as get import atr.config as config -import atr.form as form import atr.htm as htm -import atr.models.unsafe as unsafe +import atr.models.safe as safe import atr.paths as paths import atr.util as util import atr.web as web @get.typed -async def path(session: web.Committer, _published: Literal["published"], file_path: unsafe.Path) -> web.QuartResponse: +async def path(session: web.Committer, _published: Literal["published"], file_path: safe.RelPath) -> web.QuartResponse: """ URL: /published/<path:file_path> View the content of a specific file in the downloads directory. @@ -44,10 +43,7 @@ async def path(session: web.Committer, _published: Literal["published"], file_pa # Therefore this path acts as a way to check the contents of that directory if not config.get().ALLOW_TESTS: return quart.abort(404) - validated_path = form.to_relpath(str(file_path)) - if validated_path is None: - return quart.abort(400) - return await _path(session, str(validated_path)) + return await _path(session, str(file_path)) @get.typed diff --git a/atr/get/ref.py b/atr/get/ref.py index 84b18a76..16fbd34e 100644 --- a/atr/get/ref.py +++ b/atr/get/ref.py @@ -25,8 +25,7 @@ import quart import atr.blueprints.get as get import atr.config as config -import atr.form as form -import atr.models.unsafe as unsafe +import atr.models.safe as safe import atr.web as web # Perhaps GitHub will get around to implementing symbol permalinks: @@ -35,7 +34,7 @@ import atr.web as web @get.typed -async def resolve(_session: web.Public, _ref: Literal["ref"], ref_path: unsafe.Path) -> web.WerkzeugResponse: +async def resolve(_session: web.Public, _ref: Literal["ref"], ref_path: safe.RelPath) -> web.WerkzeugResponse: """ URL: /ref/<ref_path> Resolve a code reference to a GitHub permalink. @@ -45,7 +44,7 @@ async def resolve(_session: web.Public, _ref: Literal["ref"], ref_path: unsafe.P if ":" in path: file_path_str, symbol = path.rsplit(":", 1) - resolved_file, validated_path_str = _validate_and_resolve_path(file_path_str, project_root) + resolved_file, validated_path_str = _validate_and_resolve_path(safe.RelPath(file_path_str), project_root) if (not await aiofiles.os.path.exists(resolved_file)) or (not await aiofiles.os.path.isfile(resolved_file)): quart.abort(404) @@ -58,9 +57,10 @@ async def resolve(_session: web.Public, _ref: Literal["ref"], ref_path: unsafe.P github_url = f"https://github.com/apache/tooling-trusted-releases/blob/main/{validated_path_str}#L{line_number}" return quart.redirect(github_url, code=303) + # TODO: Should these functions exist in RelPath? is_directory = path.endswith("/") path_str = path.rstrip("/") - resolved_path, validated_path_str = _validate_and_resolve_path(path_str, project_root) + resolved_path, validated_path_str = _validate_and_resolve_path(safe.RelPath(path_str), project_root) if not await aiofiles.os.path.exists(resolved_path): quart.abort(404) @@ -100,11 +100,8 @@ async def _resolve_symbol_to_line(file_path: pathlib.Path, symbol: str) -> int | return None -def _validate_and_resolve_path(path_str: str, project_root: pathlib.Path) -> tuple[pathlib.Path, str]: - validated_path = form.to_relpath(path_str) - if validated_path is None: - quart.abort(400) - validated_path_str = str(validated_path) +def _validate_and_resolve_path(rel_path: safe.RelPath, project_root: pathlib.Path) -> tuple[pathlib.Path, str]: + validated_path = rel_path.as_path() file_path = project_root / validated_path @@ -114,4 +111,4 @@ def _validate_and_resolve_path(path_str: str, project_root: pathlib.Path) -> tup except (FileNotFoundError, ValueError): quart.abort(404) - return resolved_path, validated_path_str + return resolved_path, str(validated_path) diff --git a/atr/get/report.py b/atr/get/report.py index dcce9a7f..ada72165 100644 --- a/atr/get/report.py +++ b/atr/get/report.py @@ -22,10 +22,8 @@ import aiofiles.os import asfquart.base as base import atr.blueprints.get as get -import atr.form as form import atr.models.safe as safe import atr.models.sql as sql -import atr.models.unsafe as unsafe import atr.paths as paths import atr.storage as storage import atr.template as template @@ -39,15 +37,13 @@ async def selected_path( _report: Literal["report"], project_key: safe.ProjectKey, version_key: safe.VersionKey, - rel_path: unsafe.Path, + rel_path: safe.RelPath, ) -> str: """ URL: /report/<project_key>/<version_key>/<rel_path> Show the report for a specific file. """ - validated_path = form.to_relpath(rel_path) - if validated_path is None: - raise base.ASFQuartException("Invalid file path", errorcode=400) + validated_path = rel_path.as_path() # If the draft is not found, we try to get the release candidate try: @@ -98,7 +94,7 @@ async def selected_path( "report-selected-path.html", project_key=str(project_key), version_key=str(version_key), - rel_path=str(validated_path), + rel_path=str(rel_path), package=file_data, release=release, primary_results=check_results.primary_results_list, diff --git a/atr/get/sbom.py b/atr/get/sbom.py index 942f8345..df798045 100644 --- a/atr/get/sbom.py +++ b/atr/get/sbom.py @@ -33,7 +33,6 @@ import atr.htm as htm import atr.models.results as results import atr.models.safe as safe import atr.models.sql as sql -import atr.models.unsafe as unsafe import atr.render as render import atr.sbom as sbom import atr.shared as shared @@ -51,7 +50,7 @@ async def report( _sbom_report: Literal["sbom/report"], project_key: safe.ProjectKey, version_key: safe.VersionKey, - file_path: unsafe.Path, + file_path: safe.RelPath, ) -> str: """ URL: /sbom/report/<project_key>/<version_key>/<file_path> @@ -90,10 +89,7 @@ async def report( block.h1["SBOM report"] - validated_path = form.to_relpath(file_path) - if validated_path is None: - raise base.ASFQuartException("Invalid file path", errorcode=400) - validated_path_str = str(validated_path) + validated_path_str = str(file_path) task, augment_tasks, osv_tasks = await _fetch_tasks(validated_path_str, project_key, release, version_key) diff --git a/atr/models/api.py b/atr/models/api.py index feb489bf..c33604c5 100644 --- a/atr/models/api.py +++ b/atr/models/api.py @@ -360,7 +360,7 @@ class PublisherReleaseAnnounceArgs(schema.Strict): revision: safe.RevisionNumber = schema.example("00005") email_to: str = schema.example("[email protected]") body: str = schema.example("The Apache Example team is pleased to announce the release of Example 1.0.0...") - path_suffix: str = schema.example("example/1.0.0") + path_suffix: safe.OptionalRelPath = schema.example("example/1.0.0") class PublisherReleaseAnnounceResults(schema.Strict): @@ -399,7 +399,7 @@ class ReleaseAnnounceArgs(schema.Strict): revision: safe.RevisionNumber = schema.example("00005") email_to: str = schema.example("[email protected]") body: str = schema.example("The Apache Example team is pleased to announce the release of Example 1.0.0...") - path_suffix: str = schema.example("example/1.0.0") + path_suffix: safe.OptionalRelPath = schema.example("example/1.0.0") class ReleaseAnnounceResults(schema.Strict): @@ -468,7 +468,7 @@ class ReleaseRevisionsResults(schema.Strict): class ReleaseUploadArgs(schema.Strict): project: safe.ProjectKey = schema.example("example") version: safe.VersionKey = schema.example("0.0.1") - relpath: str = schema.example("example/0.0.1/example-0.0.1-bin.tar.gz") + relpath: safe.RelPath = schema.example("example/0.0.1/example-0.0.1-bin.tar.gz") content: str = schema.example("This is the content of the file.") diff --git a/atr/models/safe.py b/atr/models/safe.py index a69750dd..61ba28f5 100644 --- a/atr/models/safe.py +++ b/atr/models/safe.py @@ -17,6 +17,7 @@ from __future__ import annotations +import pathlib import string import unicodedata from typing import Annotated, Any, Final @@ -25,6 +26,7 @@ import pydantic _ALPHANUM: Final = frozenset(string.ascii_letters + string.digits + "-") _NUMERIC: Final = frozenset(string.digits) +_PATH_CHARS: Final = frozenset(string.ascii_letters + string.digits + "-._+~/()") _VERSION_CHARS: Final = _ALPHANUM | frozenset(".+") @@ -114,6 +116,42 @@ class ReleaseKey(Alphanumeric): return _VERSION_CHARS +class RelPath(SafeType): + """A relative file path that has been validated for safety.""" + + @classmethod + def _valid_chars(cls) -> frozenset[str]: + return _PATH_CHARS + + def _additional_validations(self, value: str) -> None: + posix_path = pathlib.PurePosixPath(value) + windows_path = pathlib.PureWindowsPath(value) + if posix_path.is_absolute() or windows_path.is_absolute(): + raise ValueError("Absolute paths are not allowed") + if "//" in value: + raise ValueError("Path cannot contain empty segments") + for segment in pathlib.Path(value).parts: + if segment in (".", ".."): + raise ValueError("Path cannot contain directory traversal") + if segment in (".git", ".svn"): + raise ValueError("Path cannot contain SCM directories") + if segment.startswith(".") and not segment.startswith(".atr") and segment != ".gitkeep": + raise ValueError("Path cannot contain dotfiles") + + def as_path(self) -> pathlib.Path: + """Return the validated path as a pathlib.Path.""" + return pathlib.Path(self._value) + + def append(self, path: str | pathlib.Path) -> RelPath: + return RelPath(f"{self!s}/{path!s}") + + def prepend(self, path: str | pathlib.Path) -> RelPath: + return RelPath(f"{path!s}/{self!s}") + + def removeprefix(self, prefix: str): + return RelPath(self._value.removeprefix(prefix)) + + class RevisionNumber(Numeric): """A revision number that has been validated for safety.""" @@ -138,6 +176,21 @@ def _empty_to_none(v: object) -> object: return v +def _strip_slashes_or_none(v: object) -> object: + """Strip leading/trailing slashes from a path string; return None if only slashes.""" + if isinstance(v, str): + stripped = v.strip("/") + if not stripped: + return None + return stripped + return v + + +type OptionalRelPath = Annotated[ + RelPath | None, + pydantic.BeforeValidator(_strip_slashes_or_none), +] + type OptionalRevisionNumber = Annotated[ RevisionNumber | None, pydantic.BeforeValidator(_empty_to_none), diff --git a/atr/post/announce.py b/atr/post/announce.py index d7f0b2be..4c625d91 100644 --- a/atr/post/announce.py +++ b/atr/post/announce.py @@ -64,7 +64,7 @@ async def selected( preview_revision_number = release.safe_latest_revision_number # Validate that the revision number matches - if announce_form.revision_number != str(preview_revision_number): + if announce_form.revision_number != preview_revision_number: return await session.redirect( get.announce.selected, error=f"The release has been updated since you loaded the form. " diff --git a/atr/post/draft.py b/atr/post/draft.py index 113ff157..3666679f 100644 --- a/atr/post/draft.py +++ b/atr/post/draft.py @@ -30,7 +30,6 @@ import atr.get as get import atr.log as log import atr.models.safe as safe import atr.models.sql as sql -import atr.models.unsafe as unsafe import atr.shared as shared import atr.storage as storage import atr.util as util @@ -121,20 +120,18 @@ async def delete_file( Delete a specific file from the release candidate, creating a new revision. """ rel_path_to_delete = delete_file_form.file_path - if rel_path_to_delete is None: - await quart.flash("No file path specified", "error") - return await session.redirect(get.compose.selected, project_key=str(project_key), version_key=str(version_key)) + path_to_delete = rel_path_to_delete.as_path() try: async with storage.write(session) as write: wacp = await write.as_project_committee_participant(project_key) - metadata_files_deleted = await wacp.release.delete_file(project_key, version_key, rel_path_to_delete) + metadata_files_deleted = await wacp.release.delete_file(project_key, version_key, path_to_delete) except Exception as e: log.exception("Error deleting file:") await quart.flash(f"Error deleting file: {e!s}", "error") return await session.redirect(get.compose.selected, project_key=str(project_key), version_key=str(version_key)) - success_message = f"File '{rel_path_to_delete.name}' deleted successfully" + success_message = f"File '{path_to_delete.name}' deleted successfully" if metadata_files_deleted: success_message += f", and {util.plural(metadata_files_deleted, 'associated metadata file')} deleted" return await session.redirect( @@ -148,17 +145,14 @@ async def hashgen( _draft_hashgen: Literal["draft/hashgen"], project_key: safe.ProjectKey, version_key: safe.VersionKey, - file_path: unsafe.Path, + file_path: safe.RelPath, empty_form: form.Empty, ) -> web.WerkzeugResponse: """ URL: /draft/hashgen/<project_key>/<version_key>/<file_path> Generate an sha512 hash file for a candidate draft file, creating a new revision. """ - rel_path = form.to_relpath(file_path) - if rel_path is None: - await quart.flash("Invalid file path", "error") - return await session.redirect(get.compose.selected, project_key=str(project_key), version_key=str(version_key)) + rel_path = file_path.as_path() try: async with storage.write(session) as write: @@ -242,23 +236,24 @@ async def sbomgen( _draft_sbomgen: Literal["draft/sbomgen"], project_key: safe.ProjectKey, version_key: safe.VersionKey, - file_path: unsafe.Path, + file_path: safe.RelPath, empty_form: form.Empty, ) -> web.WerkzeugResponse: """ URL: /draft/sbomgen/<project_key>/<version_key>/<file_path> Generate a CycloneDX SBOM file for a candidate draft file, creating a new revision. """ - path = str(file_path) - rel_path = form.to_relpath(path) - if rel_path is None: - await quart.flash("Invalid file path", "error") - return await session.redirect(get.compose.selected, project_key=str(project_key), version_key=str(version_key)) + rel_path = file_path.as_path() # Check that the file is a .tar.gz archive before creating a revision - if not (path.endswith(".tar.gz") or path.endswith(".tgz") or path.endswith(".zip") or path.endswith(".jar")): + if not ( + rel_path.name.endswith(".tar.gz") + or rel_path.name.endswith(".tgz") + or rel_path.name.endswith(".zip") + or rel_path.name.endswith(".jar") + ): raise base.ASFQuartException( - f"SBOM generation requires .tar.gz, .tgz, .zip or .jar files. Received: {path}", errorcode=400 + f"SBOM generation requires .tar.gz, .tgz, .zip or .jar files. Received: {file_path!s}", errorcode=400 ) try: diff --git a/atr/post/finish.py b/atr/post/finish.py index 8727c4c1..afabcc1f 100644 --- a/atr/post/finish.py +++ b/atr/post/finish.py @@ -59,12 +59,12 @@ async def _delete_empty_directory( respond: shared.finish.Respond, ) -> tuple[web.QuartResponse, int] | web.WerkzeugResponse: dir_to_delete_rel = delete_form.directory_to_delete - if dir_to_delete_rel is None: - return await respond(400, "No directory specified.") try: async with storage.write(session) as write: wacp = await write.as_project_committee_member(project_key) - creation_error = await wacp.release.delete_empty_directory(project_key, version_key, dir_to_delete_rel) + creation_error = await wacp.release.delete_empty_directory( + project_key, version_key, dir_to_delete_rel.as_path() + ) except Exception: log.exception(f"Unexpected error deleting directory {dir_to_delete_rel} for {project_key}/{version_key}") return await respond(500, "An unexpected error occurred.") @@ -83,8 +83,6 @@ async def _move_file_to_revision( ) -> tuple[web.QuartResponse, int] | web.WerkzeugResponse: source_files_rel = move_form.source_files target_dir_rel = move_form.target_directory - if target_dir_rel is None: - return await respond(400, "No target directory specified.") try: async with storage.write(session) as write: wacp = await write.as_project_committee_member(project_key) diff --git a/atr/post/sbom.py b/atr/post/sbom.py index ca0fde1f..bde31ba0 100644 --- a/atr/post/sbom.py +++ b/atr/post/sbom.py @@ -17,26 +17,21 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Literal +from typing import Literal import asfquart.base as base import quart import atr.blueprints.post as post import atr.db as db -import atr.form as form import atr.get as get import atr.log as log import atr.models.safe as safe -import atr.models.unsafe as unsafe import atr.shared as shared import atr.storage as storage import atr.util as util import atr.web as web -if TYPE_CHECKING: - import pathlib - @post.typed async def report( @@ -44,31 +39,28 @@ async def report( _sbom_report: Literal["sbom/report"], project_key: safe.ProjectKey, version_key: safe.VersionKey, - file_path: unsafe.Path, + file_path: safe.RelPath, sbom_form: shared.sbom.SBOMForm, ) -> web.WerkzeugResponse: """ URL: /sbom/report/<project_key>/<version_key>/<path:file_path> """ - validated_path = form.to_relpath(file_path) - if validated_path is None: - raise base.ASFQuartException("Invalid file path", errorcode=400) - match sbom_form: case shared.sbom.AugmentSBOMForm(): - return await _augment(session, project_key, version_key, validated_path) + return await _augment(session, project_key, version_key, file_path) case shared.sbom.ScanSBOMForm(): - return await _scan(session, project_key, version_key, validated_path) + return await _scan(session, project_key, version_key, file_path) async def _augment( - session: web.Committer, project_key: safe.ProjectKey, version_key: safe.VersionKey, rel_path: pathlib.Path + session: web.Committer, project_key: safe.ProjectKey, version_key: safe.VersionKey, rel_path: safe.RelPath ) -> web.WerkzeugResponse: """Augment a CycloneDX SBOM file.""" + path = rel_path.as_path() # Check that the file is a .cdx.json archive before creating a revision - if not (rel_path.name.endswith(".cdx.json")): + if not (path.name.endswith(".cdx.json")): raise base.ASFQuartException("SBOM augmentation is only supported for .cdx.json files", errorcode=400) try: @@ -79,13 +71,13 @@ async def _augment( revision_number = release.latest_revision_number if revision_number is None: raise RuntimeError("No revision number found for new revision creation") - log.info(f"Augmenting SBOM for {project_key} {version_key} {revision_number} {rel_path}") + log.info(f"Augmenting SBOM for {project_key} {version_key} {revision_number} {path}") async with storage.write_as_project_committee_member(project_key) as wacm: sbom_task = await wacm.sbom.augment_cyclonedx( project_key, version_key, revision_number, - rel_path, + path, ) except Exception as e: @@ -100,7 +92,7 @@ async def _augment( return await session.redirect( get.sbom.report, - success=f"SBOM augmentation task queued for {rel_path.name} (task ID: {util.unwrap(sbom_task.id)})", + success=f"SBOM augmentation task queued for {path.name} (task ID: {util.unwrap(sbom_task.id)})", project_key=project_key, version_key=version_key, file_path=str(rel_path), @@ -108,10 +100,11 @@ async def _augment( async def _scan( - session: web.Committer, project_key: safe.ProjectKey, version_key: safe.VersionKey, rel_path: pathlib.Path + session: web.Committer, project_key: safe.ProjectKey, version_key: safe.VersionKey, rel_path: safe.RelPath ) -> web.WerkzeugResponse: """Scan a CycloneDX SBOM file for vulnerabilities using OSV.""" - if not (rel_path.name.endswith(".cdx.json")): + path = rel_path.as_path() + if not (path.name.endswith(".cdx.json")): raise base.ASFQuartException("OSV scanning is only supported for .cdx.json files", errorcode=400) try: @@ -122,13 +115,13 @@ async def _scan( revision_number = release.latest_revision_number if revision_number is None: raise RuntimeError("No revision number found for OSV scan") - log.info(f"Starting OSV scan for {project_key} {version_key} {revision_number} {rel_path}") + log.info(f"Starting OSV scan for {project_key} {version_key} {revision_number} {path}") async with storage.write_as_project_committee_member(project_key) as wacm: sbom_task = await wacm.sbom.osv_scan_cyclonedx( project_key, version_key, revision_number, - rel_path, + path, ) except Exception as e: @@ -143,7 +136,7 @@ async def _scan( return await session.redirect( get.sbom.report, - success=f"OSV vulnerability scan queued for {rel_path.name} (task ID: {util.unwrap(sbom_task.id)})", + success=f"OSV vulnerability scan queued for {path.name} (task ID: {util.unwrap(sbom_task.id)})", project_key=project_key, version_key=version_key, file_path=str(rel_path), diff --git a/atr/post/upload.py b/atr/post/upload.py index 1da1d2b6..1b63e13e 100644 --- a/atr/post/upload.py +++ b/atr/post/upload.py @@ -232,10 +232,12 @@ async def _add_files( ) -def _construct_svn_url(committee_key: str, area: shared.upload.SvnArea, path: str, *, is_podling: bool) -> str: +def _construct_svn_url( + committee_key: str, area: shared.upload.SvnArea, path: safe.RelPath, *, is_podling: bool +) -> safe.RelPath: if is_podling: - return f"{area.value}/incubator/{committee_key}/{path}" - return f"{area.value}/{committee_key}/{path}" + return path.prepend(f"{area.value}/incubator/{committee_key}") + return path.prepend(f"{area.value}/{committee_key}") def _json_error(message: str, status: int) -> web.WerkzeugResponse: @@ -256,9 +258,9 @@ async def _svn_import( ) -> web.WerkzeugResponse: # audit_guidance any file uploads are from known and managed repositories so file size is not an issue try: - target_subdirectory = str(svn_form.target_subdirectory) if svn_form.target_subdirectory else None + target_subdirectory = svn_form.target_subdirectory if svn_form.target_subdirectory else None svn_area = shared.upload.SvnArea.DEV - svn_path = svn_form.svn_path or "" + svn_path = svn_form.svn_path async with db.session() as data: release = await session.release(project_key, version_key, data=data) diff --git a/atr/shared/announce.py b/atr/shared/announce.py index d565804b..e156cdbe 100644 --- a/atr/shared/announce.py +++ b/atr/shared/announce.py @@ -17,15 +17,14 @@ from typing import Literal -import pydantic - import atr.form as form +import atr.models.safe as safe class AnnounceForm(form.Form): """Form for announcing a release preview.""" - revision_number: str = form.label("Revision number", widget=form.Widget.HIDDEN) + revision_number: safe.RevisionNumber = form.label("Revision number", widget=form.Widget.HIDDEN) mailing_list: str = form.label( "Send vote email to", widget=form.Widget.CUSTOM, @@ -33,25 +32,8 @@ class AnnounceForm(form.Form): subject: str = form.label("Subject", widget=form.Widget.CUSTOM) subject_template_hash: str = form.label("Subject template hash", widget=form.Widget.HIDDEN) body: str = form.label("Body", widget=form.Widget.CUSTOM) - download_path_suffix: str = form.label("Download path suffix", widget=form.Widget.CUSTOM) + download_path_suffix: safe.OptionalRelPath = form.label("Download path suffix", widget=form.Widget.CUSTOM) confirm_announce: Literal["CONFIRM"] = form.label( "Confirm", "Type CONFIRM (in capitals) to enable the submit button.", ) - - @pydantic.field_validator("download_path_suffix") - @classmethod - def validate_and_normalize_download_path_suffix(cls, suffix: str) -> str: - if (".." in suffix) or ("//" in suffix): - raise ValueError("Download path suffix must not contain .. or //") - if suffix.startswith("./"): - suffix = suffix[1:] - elif suffix == ".": - suffix = "/" - if not suffix.startswith("/"): - suffix = "/" + suffix - if not suffix.endswith("/"): - suffix = suffix + "/" - if "/." in suffix: - raise ValueError("Download path suffix must not contain /.") - return suffix diff --git a/atr/shared/draft.py b/atr/shared/draft.py index 4be74573..896660ec 100644 --- a/atr/shared/draft.py +++ b/atr/shared/draft.py @@ -16,6 +16,7 @@ # under the License. import atr.form as form +import atr.models.safe as safe class ClearQuarantineForm(form.Form): @@ -23,7 +24,7 @@ class ClearQuarantineForm(form.Form): class DeleteFileForm(form.Form): - file_path: form.RelPath = form.label("File path", widget=form.Widget.HIDDEN) + file_path: safe.RelPath = form.label("File path", widget=form.Widget.HIDDEN) class HashGen(form.Empty): diff --git a/atr/shared/finish.py b/atr/shared/finish.py index 4f5501a4..1a7f835f 100644 --- a/atr/shared/finish.py +++ b/atr/shared/finish.py @@ -21,6 +21,7 @@ from typing import Annotated, Literal import pydantic import atr.form as form +import atr.models.safe as safe import atr.web as web type Respond = Callable[[int, str], Awaitable[tuple[web.QuartResponse, int] | web.WerkzeugResponse]] @@ -32,25 +33,24 @@ type REMOVE_RC_TAGS = Literal["REMOVE_RC_TAGS"] class DeleteEmptyDirectoryForm(form.Form): variant: DELETE_DIR = form.value(DELETE_DIR) - directory_to_delete: form.RelPath = form.label("Directory to delete", widget=form.Widget.SELECT) + directory_to_delete: safe.RelPath = form.label("Directory to delete", widget=form.Widget.SELECT) class MoveFileForm(form.Form): variant: MOVE_FILE = form.value(MOVE_FILE) source_files: form.RelPathList = form.label("Files to move") - target_directory: form.RelPath = form.label("Target directory") + target_directory: safe.RelPath = form.label("Target directory") @pydantic.model_validator(mode="after") def validate_move(self) -> "MoveFileForm": if not self.source_files: raise ValueError("Please select at least one file to move.") - if self.target_directory is None: - raise ValueError("Target directory is required.") - + target_dir_path = self.target_directory.as_path() for source_path in self.source_files: - if source_path.parent == self.target_directory: - raise ValueError(f"Target directory cannot be the same as the source directory for {source_path.name}.") + source = source_path.as_path() + if source.parent == target_dir_path: + raise ValueError(f"Target directory cannot be the same as the source directory for {source.name}.") return self diff --git a/atr/shared/upload.py b/atr/shared/upload.py index dc24892a..99a88a28 100644 --- a/atr/shared/upload.py +++ b/atr/shared/upload.py @@ -19,6 +19,7 @@ import enum from typing import Annotated, Literal import atr.form as form +import atr.models.safe as safe type ADD_FILES = Literal["add_files"] type SVN_IMPORT = Literal["svn_import"] @@ -41,7 +42,7 @@ class SvnImportForm(form.Form): # "Select whether to import from dev or release.", # widget=form.Widget.RADIO, # ) - svn_path: form.URLPath = form.label( + svn_path: safe.RelPath = form.label( "SVN path", "Path within the committee's svn:dist directory, e.g. 'java-library/4_0_4' or '3.1.5rc1'.", ) @@ -50,7 +51,7 @@ class SvnImportForm(form.Form): "Specify an SVN revision number or leave as HEAD for the latest.", default="HEAD", ) - target_subdirectory: form.Filename = form.label( + target_subdirectory: safe.OptionalRelPath = form.label( "Target subdirectory", "Optional: Subdirectory to place imported files, defaulting to the root.", ) diff --git a/atr/storage/writers/announce.py b/atr/storage/writers/announce.py index d4cd9909..c1390220 100644 --- a/atr/storage/writers/announce.py +++ b/atr/storage/writers/announce.py @@ -109,7 +109,7 @@ class CommitteeMember(CommitteeParticipant): preview_revision_number: safe.RevisionNumber, recipient: str, body: str, - download_path_suffix: str, + download_path_suffix: safe.RelPath | None, asf_uid: str, fullname: str, subject_template_hash: str | None = None, @@ -254,14 +254,16 @@ class CommitteeMember(CommitteeParticipant): self, committee: sql.Committee, unfinished_path: pathlib.Path, - download_path_suffix: str, + download_path_suffix: safe.RelPath | None, dry_run: bool = False, preserve: bool = False, ) -> None: """Hard link the release files to the downloads directory.""" # TODO: Rename *_dir functions to _path functions downloads_base_path = paths.get_downloads_dir() - downloads_path = downloads_base_path / committee.key / download_path_suffix.removeprefix("/") + downloads_path = downloads_base_path / committee.key + if download_path_suffix is not None: + downloads_path = downloads_path / download_path_suffix.as_path() # The "exist_ok" parameter means to overwrite files if True # We only overwrite if we're not preserving, so we supply "not preserve" # TODO: Add a test for this diff --git a/atr/storage/writers/release.py b/atr/storage/writers/release.py index 0f5da9ba..facdd57e 100644 --- a/atr/storage/writers/release.py +++ b/atr/storage/writers/release.py @@ -32,7 +32,6 @@ import sqlmodel import atr.analysis as analysis import atr.config as config import atr.db as db -import atr.form as form import atr.log as log import atr.models.api as api import atr.models.safe as safe @@ -274,14 +273,14 @@ class CommitteeParticipant(FoundationCommitter): self, project_key: safe.ProjectKey, version_key: safe.VersionKey, - svn_url: str, - revision: str, - target_subdirectory: str | None, + svn_url: safe.RelPath, + svn_revision: str, + target_subdirectory: safe.RelPath | None, ) -> sql.Task: task_args = { - "svn_url": svn_url, - "revision": revision, - "target_subdirectory": target_subdirectory, + "svn_url": str(svn_url), + "revision": svn_revision, + "target_subdirectory": str(target_subdirectory) if target_subdirectory else None, "project_key": str(project_key), "version_key": str(version_key), "asf_uid": self.__asf_uid, @@ -304,8 +303,8 @@ class CommitteeParticipant(FoundationCommitter): self, project_key: safe.ProjectKey, version_key: safe.VersionKey, - source_files_rel: list[pathlib.Path], - target_dir_rel: pathlib.Path, + source_files_rel: list[safe.RelPath], + target_dir_rel: safe.RelPath, ) -> tuple[str | None, list[str], list[str]]: description = "File move through web interface" moved_files_names: list[str] = [] @@ -486,9 +485,7 @@ class CommitteeParticipant(FoundationCommitter): async def upload_file(self, args: api.ReleaseUploadArgs) -> sql.Revision | sql.Quarantined: file_bytes = base64.b64decode(args.content, validate=True) - validated_path = form.to_relpath(args.relpath) - if validated_path is None: - raise storage.AccessError("Invalid file path") + validated_path = args.relpath.as_path() description = f"Upload via API: {validated_path}" async def modify(path: pathlib.Path, _old_rev: sql.Revision | None) -> None: @@ -525,11 +522,8 @@ class CommitteeParticipant(FoundationCommitter): for file in files: if not file.filename: raise storage.AccessError("No filename provided") - # Validate the filename from multipart upload - validated_path = form.to_relpath(file.filename) - if validated_path is None: - raise storage.AccessError("Invalid filename") - target_path = path / validated_path + # Validate the filename from multipart upload and construct the new path + target_path = path / str(safe.RelPath(file.filename)) await aiofiles.os.makedirs(target_path.parent, exist_ok=True) await self.__save_file(file, target_path) @@ -687,13 +681,13 @@ class CommitteeParticipant(FoundationCommitter): async def __setup_revision( self, - source_files_rel: list[pathlib.Path], - target_dir_rel: pathlib.Path, + source_files_rel: list[safe.RelPath], + target_dir_rel: safe.RelPath, interim_path: pathlib.Path, moved_files_names: list[str], skipped_files_names: list[str], ) -> None: - target_path = interim_path / target_dir_rel + target_path = interim_path / target_dir_rel.as_path() try: resolved = await asyncio.to_thread(target_path.resolve) resolved.relative_to(await asyncio.to_thread(interim_path.resolve)) @@ -704,6 +698,7 @@ class CommitteeParticipant(FoundationCommitter): if not await aiofiles.os.path.exists(target_path): for part in target_path.parts: # TODO: This .prefix check could include some existing directory segment + # TODO: The second check probably isn't needed now since this came from a safe RelPath type. if util.is_disallowed_dotfile(part): raise types.FailedError("This segment is a disallowed dotfile") if ".." in part: @@ -723,33 +718,35 @@ class CommitteeParticipant(FoundationCommitter): async def __setup_revision_item( self, - source_file_rel: pathlib.Path, - target_dir_rel: pathlib.Path, + source_file_rel: safe.RelPath, + target_dir_rel: safe.RelPath, interim_path: pathlib.Path, moved_files_names: list[str], skipped_files_names: list[str], target_path: pathlib.Path, ) -> None: - if source_file_rel.parent == target_dir_rel: - skipped_files_names.append(source_file_rel.name) + source_path = source_file_rel.as_path() + target_dir_path = target_dir_rel.as_path() + if source_path.parent == target_dir_path: + skipped_files_names.append(source_path.name) return - full_source_item_path = interim_path / source_file_rel + full_source_item_path = interim_path / source_path if await aiofiles.os.path.isdir(full_source_item_path): - if (target_dir_rel == source_file_rel) or (interim_path / target_dir_rel).resolve().is_relative_to( + if (target_dir_rel == source_file_rel) or (interim_path / target_dir_path).resolve().is_relative_to( full_source_item_path.resolve() ): raise types.FailedError("Cannot move a directory into itself or a subdirectory of itself") - final_target_for_item = target_path / source_file_rel.name + final_target_for_item = target_path / source_path.name if await aiofiles.os.path.exists(final_target_for_item): raise types.FailedError("Target name already exists") await aiofiles.os.rename(full_source_item_path, final_target_for_item) - moved_files_names.append(source_file_rel.name) + moved_files_names.append(source_path.name) else: - related_files = self.__related_files(source_file_rel) + related_files = self.__related_files(source_path) bundle = [f for f in related_files if await aiofiles.os.path.exists(interim_path / f)] for f_check in bundle: if await aiofiles.os.path.isdir(interim_path / f_check): @@ -761,7 +758,7 @@ class CommitteeParticipant(FoundationCommitter): for f in bundle: await aiofiles.os.rename(interim_path / f, target_path / f.name) - if f == source_file_rel: + if f == source_path: moved_files_names.append(f.name) async def __tasks_ongoing( diff --git a/atr/tasks/gha.py b/atr/tasks/gha.py index fd18f723..f3d32796 100644 --- a/atr/tasks/gha.py +++ b/atr/tasks/gha.py @@ -35,7 +35,6 @@ import atr.storage as storage import atr.tasks as tasks import atr.tasks.checks as checks import atr.util as util -from atr.models.results import DistributionWorkflowStatus _BASE_URL: Final[str] = "https://api.github.com/repos" _IN_PROGRESS_STATUSES: Final[list[str]] = ["in_progress", "queued", "requested", "waiting", "pending", "expected"] @@ -68,7 +67,7 @@ class WorkflowStatusCheck(schema.Strict): @checks.with_model(WorkflowStatusCheck) -async def status_check(args: WorkflowStatusCheck) -> DistributionWorkflowStatus: +async def status_check(args: WorkflowStatusCheck) -> results.DistributionWorkflowStatus: """Check remote workflow statuses.""" headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {config.get().GITHUB_TOKEN}"} diff --git a/atr/tasks/svn.py b/atr/tasks/svn.py index e7a99f62..476a40f3 100644 --- a/atr/tasks/svn.py +++ b/atr/tasks/svn.py @@ -75,6 +75,7 @@ async def _import_files_core(args: SvnImport) -> str: project = safe.ProjectKey(args.project_key) version = safe.VersionKey(args.version_key) + svn_path = safe.RelPath(args.svn_url) log.info(f"Starting SVN import for {args.project_key}-{args.version_key}") # We have to use a temporary directory otherwise SVN thinks it's a pegged revision @@ -109,7 +110,7 @@ async def _import_files_core(args: SvnImport) -> str: "-r", args.revision, "--", - f"{_SVN_BASE_URL}/{args.svn_url}", + f"{_SVN_BASE_URL}/{svn_path!s}", str(temp_export_path), ] diff --git a/atr/util.py b/atr/util.py index eed4feae..166240dd 100644 --- a/atr/util.py +++ b/atr/util.py @@ -1167,31 +1167,6 @@ def validate_path_segment(path_segment: str, position: str = "Path segment") -> return path_segment -def validate_path_str(path_str: str) -> str: - # The pathlib module normalises empty components - # Therefore, we must do this check on the path string - if "//" in path_str: - raise ValueError("Path cannot contain //") - - for segment in pathlib.Path(path_str).parts: - validate_path_segment(segment) - return path_str - - -def validate_relative_path_str(path_str: str) -> str: - # Check for absolute paths using both POSIX and Windows semantics - # We don't support Windows paths, but we want to detect all bad inputs - # PurePosixPath doesn't recognise Windows drive letters as absolute - # PureWindowsPath treats leading "/" differently - posix_path = pathlib.PurePosixPath(path_str) - windows_path = pathlib.PureWindowsPath(path_str) - if posix_path.is_absolute() or windows_path.is_absolute(): - raise ValueError("Absolute paths are not allowed") - - validate_path_str(path_str) - return path_str - - # TODO: AM put these rules into safe.versionkey def version_key_error(version_key: str) -> str | None: """Check if the given version name is valid.""" --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
