This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/sbp by this push:
new 08edc8a Improve the validation of filenames, paths, and relative paths
08edc8a is described below
commit 08edc8acc9b578809367f1a7573ea6dce215c99d
Author: Sean B. Palmer <[email protected]>
AuthorDate: Wed Feb 11 11:54:35 2026 +0000
Improve the validation of filenames, paths, and relative paths
---
atr/form.py | 60 ++++++-------------------------------------------------
atr/get/finish.py | 13 +++++-------
atr/util.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 70 insertions(+), 62 deletions(-)
diff --git a/atr/form.py b/atr/form.py
index 909b0e6..bff9406 100644
--- a/atr/form.py
+++ b/atr/form.py
@@ -22,7 +22,6 @@ import json
import pathlib
import re
import types
-import unicodedata
from typing import TYPE_CHECKING, Annotated, Any, Final, Literal,
TypeAliasType, get_args, get_origin
import htpy
@@ -418,23 +417,9 @@ def to_filename(v: Any) -> pathlib.Path | None:
if not v:
return None
- name = str(v).strip()
-
- if not name:
- raise ValueError("Filename cannot be empty")
-
- if "\0" in name:
- raise ValueError("Filename cannot contain null bytes")
-
- name = unicodedata.normalize("NFC", name)
-
- if ("/" in name) or ("\\" in name):
- raise ValueError("Filename cannot contain path separators")
-
- if name in (".", ".."):
- raise ValueError("Invalid filename")
-
- return pathlib.Path(name)
+ filename = str(v).strip()
+ filename = util.validate_filename(filename)
+ return pathlib.Path(filename)
def to_int(v: Any) -> int:
@@ -461,8 +446,8 @@ def to_relpath(v: Any) -> pathlib.Path | None:
if not path_str:
raise ValueError("Path cannot be empty")
- validated = _validate_relpath_string(path_str)
- return pathlib.Path(validated)
+ util.validate_relative_path_str(path_str)
+ return pathlib.Path(path_str)
def to_relpath_list(v: Any) -> list[pathlib.Path]:
@@ -500,8 +485,7 @@ def to_url_path(v: Any) -> str | None:
if not path_str:
raise ValueError("Path cannot be empty")
- validated = _validate_relpath_string(path_str)
- return str(validated)
+ return util.validate_relative_path_str(path_str)
# Validator types come before other functions
@@ -1071,35 +1055,3 @@ def _render_widget( # noqa: C901
elements.append(error_div)
return htm.div[elements] if (len(elements) > 1) else elements[0]
-
-
-def _validate_relpath_string(path_str: str) -> pathlib.PurePosixPath:
- if "\0" in path_str:
- raise ValueError("Path cannot contain null bytes")
-
- path_str = unicodedata.normalize("NFC", path_str)
-
- if "\\" in path_str:
- raise ValueError("Path cannot contain backslashes")
-
- # PurePosixPath normalises empty components
- # Therefore, we must do this check on the path string
- if "//" in path_str:
- raise ValueError("Path cannot contain //")
-
- # Check for absolute paths using both POSIX and Windows semantics
- # We don't support Windows paths, but we want to detect all bad inputs
- # PurePosixPath doesn't recognise Windows drive letters as absolute
- # PureWindowsPath treats leading "/" differently
- posix_path = pathlib.PurePosixPath(path_str)
- windows_path = pathlib.PureWindowsPath(path_str)
- if posix_path.is_absolute() or windows_path.is_absolute():
- raise ValueError("Absolute paths are not allowed")
-
- for part in posix_path.parts:
- if part == "..":
- raise ValueError("Parent directory references (..) are not
allowed")
- if part == ".":
- raise ValueError("Self directory references (.) are not allowed")
-
- return posix_path
diff --git a/atr/get/finish.py b/atr/get/finish.py
index a209e18..61fabc7 100644
--- a/atr/get/finish.py
+++ b/atr/get/finish.py
@@ -436,16 +436,13 @@ async def _render_page(
# JavaScript data
# TODO: Add htm.script
csrf_token = utils.generate_csrf()
+ # Should be already validated, but check again
+ safe_source_files_rel = [util.validate_path(f).as_posix() for f in
sorted(source_files_rel)]
+ safe_target_dirs = [util.validate_path(d).as_posix() for d in
sorted(target_dirs)]
page.append(
- htpy.script(id="file-data", type="application/json")[
- markupsafe.Markup(json.dumps([str(f) for f in
sorted(source_files_rel)]))
- ]
- )
- page.append(
- htpy.script(id="dir-data", type="application/json")[
- markupsafe.Markup(json.dumps(sorted([str(d) for d in
target_dirs])))
- ]
+ htpy.script(id="file-data",
type="application/json")[markupsafe.escape(json.dumps(safe_source_files_rel))]
)
+ page.append(htpy.script(id="dir-data",
type="application/json")[markupsafe.escape(json.dumps(safe_target_dirs))])
page.append(
htpy.script(
id="main-script-data",
diff --git a/atr/util.py b/atr/util.py
index 13d9480..2428a94 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -29,6 +29,7 @@ import re
import ssl
import tarfile
import tempfile
+import unicodedata
import uuid
import zipfile
from collections.abc import AsyncGenerator, Callable, Iterable, Sequence
@@ -1186,6 +1187,64 @@ def validate_as_type[T](value: Any, t: type[T]) -> T:
return value
+def validate_filename(filename: str) -> str:
+ return validate_path_segment(filename, "Filename")
+
+
+def validate_path(path: pathlib.Path) -> pathlib.Path:
+ for segment in path.parts:
+ validate_path_segment(segment)
+ return path
+
+
+def validate_path_segment(path_segment: str, position: str = "Path segment")
-> str:
+ if not path_segment:
+ raise ValueError(f"{position} cannot be empty")
+
+ if "\0" in path_segment:
+ raise ValueError(f"{position} cannot contain null bytes")
+
+ if path_segment != unicodedata.normalize("NFC", path_segment):
+ raise ValueError(f"{position} must be in Unicode Normalization Form C
(NFC)")
+
+ # TODO: Check relevant constants too?
+ if ("/" in path_segment) or ("\\" in path_segment):
+ raise ValueError(f"{position} cannot contain path separators")
+
+ if ("<" in path_segment) or (">" in path_segment) or ("&" in path_segment):
+ raise ValueError(f"{position} cannot contain markup characters")
+
+ if path_segment in (".", ".."):
+ raise ValueError(f"{position} cannot be a directory traversal")
+
+ return path_segment
+
+
+def validate_path_str(path_str: str) -> str:
+ # The pathlib module normalises empty components
+ # Therefore, we must do this check on the path string
+ if "//" in path_str:
+ raise ValueError("Path cannot contain //")
+
+ for segment in pathlib.Path(path_str).parts:
+ validate_path_segment(segment)
+ return path_str
+
+
+def validate_relative_path_str(path_str: str) -> str:
+ # Check for absolute paths using both POSIX and Windows semantics
+ # We don't support Windows paths, but we want to detect all bad inputs
+ # PurePosixPath doesn't recognise Windows drive letters as absolute
+ # PureWindowsPath treats leading "/" differently
+ posix_path = pathlib.PurePosixPath(path_str)
+ windows_path = pathlib.PureWindowsPath(path_str)
+ if posix_path.is_absolute() or windows_path.is_absolute():
+ raise ValueError("Absolute paths are not allowed")
+
+ validate_path_str(path_str)
+ return path_str
+
+
def version_name_error(version_name: str) -> str | None:
"""Check if the given version name is valid."""
if version_name == "":
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]