This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/sbp by this push:
     new 08edc8a  Improve the validation of filenames, paths, and relative paths
08edc8a is described below

commit 08edc8acc9b578809367f1a7573ea6dce215c99d
Author: Sean B. Palmer <[email protected]>
AuthorDate: Wed Feb 11 11:54:35 2026 +0000

    Improve the validation of filenames, paths, and relative paths
---
 atr/form.py       | 60 ++++++-------------------------------------------------
 atr/get/finish.py | 13 +++++-------
 atr/util.py       | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 62 deletions(-)

diff --git a/atr/form.py b/atr/form.py
index 909b0e6..bff9406 100644
--- a/atr/form.py
+++ b/atr/form.py
@@ -22,7 +22,6 @@ import json
 import pathlib
 import re
 import types
-import unicodedata
 from typing import TYPE_CHECKING, Annotated, Any, Final, Literal, 
TypeAliasType, get_args, get_origin
 
 import htpy
@@ -418,23 +417,9 @@ def to_filename(v: Any) -> pathlib.Path | None:
     if not v:
         return None
 
-    name = str(v).strip()
-
-    if not name:
-        raise ValueError("Filename cannot be empty")
-
-    if "\0" in name:
-        raise ValueError("Filename cannot contain null bytes")
-
-    name = unicodedata.normalize("NFC", name)
-
-    if ("/" in name) or ("\\" in name):
-        raise ValueError("Filename cannot contain path separators")
-
-    if name in (".", ".."):
-        raise ValueError("Invalid filename")
-
-    return pathlib.Path(name)
+    filename = str(v).strip()
+    filename = util.validate_filename(filename)
+    return pathlib.Path(filename)
 
 
 def to_int(v: Any) -> int:
@@ -461,8 +446,8 @@ def to_relpath(v: Any) -> pathlib.Path | None:
     if not path_str:
         raise ValueError("Path cannot be empty")
 
-    validated = _validate_relpath_string(path_str)
-    return pathlib.Path(validated)
+    util.validate_relative_path_str(path_str)
+    return pathlib.Path(path_str)
 
 
 def to_relpath_list(v: Any) -> list[pathlib.Path]:
@@ -500,8 +485,7 @@ def to_url_path(v: Any) -> str | None:
     if not path_str:
         raise ValueError("Path cannot be empty")
 
-    validated = _validate_relpath_string(path_str)
-    return str(validated)
+    return util.validate_relative_path_str(path_str)
 
 
 # Validator types come before other functions
@@ -1071,35 +1055,3 @@ def _render_widget(  # noqa: C901
         elements.append(error_div)
 
     return htm.div[elements] if (len(elements) > 1) else elements[0]
-
-
-def _validate_relpath_string(path_str: str) -> pathlib.PurePosixPath:
-    if "\0" in path_str:
-        raise ValueError("Path cannot contain null bytes")
-
-    path_str = unicodedata.normalize("NFC", path_str)
-
-    if "\\" in path_str:
-        raise ValueError("Path cannot contain backslashes")
-
-    # PurePosixPath normalises empty components
-    # Therefore, we must do this check on the path string
-    if "//" in path_str:
-        raise ValueError("Path cannot contain //")
-
-    # Check for absolute paths using both POSIX and Windows semantics
-    # We don't support Windows paths, but we want to detect all bad inputs
-    # PurePosixPath doesn't recognise Windows drive letters as absolute
-    # PureWindowsPath treats leading "/" differently
-    posix_path = pathlib.PurePosixPath(path_str)
-    windows_path = pathlib.PureWindowsPath(path_str)
-    if posix_path.is_absolute() or windows_path.is_absolute():
-        raise ValueError("Absolute paths are not allowed")
-
-    for part in posix_path.parts:
-        if part == "..":
-            raise ValueError("Parent directory references (..) are not 
allowed")
-        if part == ".":
-            raise ValueError("Self directory references (.) are not allowed")
-
-    return posix_path
diff --git a/atr/get/finish.py b/atr/get/finish.py
index a209e18..61fabc7 100644
--- a/atr/get/finish.py
+++ b/atr/get/finish.py
@@ -436,16 +436,13 @@ async def _render_page(
     # JavaScript data
     # TODO: Add htm.script
     csrf_token = utils.generate_csrf()
+    # Should be already validated, but check again
+    safe_source_files_rel = [util.validate_path(f).as_posix() for f in 
sorted(source_files_rel)]
+    safe_target_dirs = [util.validate_path(d).as_posix() for d in 
sorted(target_dirs)]
     page.append(
-        htpy.script(id="file-data", type="application/json")[
-            markupsafe.Markup(json.dumps([str(f) for f in 
sorted(source_files_rel)]))
-        ]
-    )
-    page.append(
-        htpy.script(id="dir-data", type="application/json")[
-            markupsafe.Markup(json.dumps(sorted([str(d) for d in 
target_dirs])))
-        ]
+        htpy.script(id="file-data", 
type="application/json")[markupsafe.escape(json.dumps(safe_source_files_rel))]
     )
+    page.append(htpy.script(id="dir-data", 
type="application/json")[markupsafe.escape(json.dumps(safe_target_dirs))])
     page.append(
         htpy.script(
             id="main-script-data",
diff --git a/atr/util.py b/atr/util.py
index 13d9480..2428a94 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -29,6 +29,7 @@ import re
 import ssl
 import tarfile
 import tempfile
+import unicodedata
 import uuid
 import zipfile
 from collections.abc import AsyncGenerator, Callable, Iterable, Sequence
@@ -1186,6 +1187,64 @@ def validate_as_type[T](value: Any, t: type[T]) -> T:
     return value
 
 
+def validate_filename(filename: str) -> str:
+    return validate_path_segment(filename, "Filename")
+
+
+def validate_path(path: pathlib.Path) -> pathlib.Path:
+    for segment in path.parts:
+        validate_path_segment(segment)
+    return path
+
+
+def validate_path_segment(path_segment: str, position: str = "Path segment") 
-> str:
+    if not path_segment:
+        raise ValueError(f"{position} cannot be empty")
+
+    if "\0" in path_segment:
+        raise ValueError(f"{position} cannot contain null bytes")
+
+    if path_segment != unicodedata.normalize("NFC", path_segment):
+        raise ValueError(f"{position} must be in Unicode Normalization Form C 
(NFC)")
+
+    # TODO: Check relevant constants too?
+    if ("/" in path_segment) or ("\\" in path_segment):
+        raise ValueError(f"{position} cannot contain path separators")
+
+    if ("<" in path_segment) or (">" in path_segment) or ("&" in path_segment):
+        raise ValueError(f"{position} cannot contain markup characters")
+
+    if path_segment in (".", ".."):
+        raise ValueError(f"{position} cannot be a directory traversal")
+
+    return path_segment
+
+
+def validate_path_str(path_str: str) -> str:
+    # The pathlib module normalises empty components
+    # Therefore, we must do this check on the path string
+    if "//" in path_str:
+        raise ValueError("Path cannot contain //")
+
+    for segment in pathlib.Path(path_str).parts:
+        validate_path_segment(segment)
+    return path_str
+
+
+def validate_relative_path_str(path_str: str) -> str:
+    # Check for absolute paths using both POSIX and Windows semantics
+    # We don't support Windows paths, but we want to detect all bad inputs
+    # PurePosixPath doesn't recognise Windows drive letters as absolute
+    # PureWindowsPath treats leading "/" differently
+    posix_path = pathlib.PurePosixPath(path_str)
+    windows_path = pathlib.PureWindowsPath(path_str)
+    if posix_path.is_absolute() or windows_path.is_absolute():
+        raise ValueError("Absolute paths are not allowed")
+
+    validate_path_str(path_str)
+    return path_str
+
+
 def version_name_error(version_name: str) -> str | None:
     """Check if the given version name is valid."""
     if version_name == "":


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to