This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 53b5b42f8f0 Escape LIKE wildcards in non-search filter parameters 
(#67496)
53b5b42f8f0 is described below

commit 53b5b42f8f09f7f96d3c26a640ecd0a979ba3540
Author: Jarek Potiuk <[email protected]>
AuthorDate: Tue May 26 00:09:43 2026 +0200

    Escape LIKE wildcards in non-search filter parameters (#67496)
    
    `_OwnersFilter`, `_AssetDependencyFilter`, and `_ConsumingAssetFilter`
    embedded user-supplied values directly into `ILIKE '%...%'` patterns
    without escaping the SQL wildcard metacharacters `%` and `_`. These
    filter classes are not documented as supporting wildcard semantics
    (unlike `_SearchParam`), so a user supplying `%` or `_` would trigger
    pattern matching rather than the literal substring matching the filter
    promises — widening match results beyond the filter's intent.
    
    This is not SQL injection (SQLAlchemy parameterizes the values) and RBAC
    still bounds results, but the pattern semantics leak is a defense-in-
    depth issue worth closing.
    
    Add `_escape_like_pattern()` helper that escapes `\`, `%`, and `_`, and
    apply it in the three affected filters along with an explicit
    `escape="\\"` clause on the `.ilike()` call. `_SearchParam` is left
    unchanged — its docstring explicitly documents wildcard support.
---
 .../src/airflow/api_fastapi/common/parameters.py   | 42 ++++++++++++--
 .../unit/api_fastapi/common/test_parameters.py     | 65 ++++++++++++++++++++++
 2 files changed, 101 insertions(+), 6 deletions(-)

diff --git a/airflow-core/src/airflow/api_fastapi/common/parameters.py 
b/airflow-core/src/airflow/api_fastapi/common/parameters.py
index f478da5b898..56b4c20884c 100644
--- a/airflow-core/src/airflow/api_fastapi/common/parameters.py
+++ b/airflow-core/src/airflow/api_fastapi/common/parameters.py
@@ -284,6 +284,26 @@ class _PrefixPatternParam(BaseParam[str], ABC):
         return value
 
 
+_LIKE_ESCAPE_CHAR = "\\"
+
+
+def _escape_like_pattern(value: str) -> str:
+    r"""
+    Escape SQL ``LIKE`` / ``ILIKE`` metacharacters in a user-supplied value.
+
+    Use together with ``column.ilike(f"%{_escape_like_pattern(value)}%", 
escape="\\")`` on filter
+    parameters that intend literal substring matching (so a user-supplied 
``%`` or ``_`` does not
+    widen the match beyond what the filter semantics promise). Search 
parameters that explicitly
+    expose wildcard semantics (see :class:`_SearchParam`) must not call this — 
they want the
+    metacharacters to pass through.
+    """
+    return (
+        value.replace(_LIKE_ESCAPE_CHAR, _LIKE_ESCAPE_CHAR * 2)
+        .replace("%", _LIKE_ESCAPE_CHAR + "%")
+        .replace("_", _LIKE_ESCAPE_CHAR + "_")
+    )
+
+
 class _SearchParam(BaseParam[str]):
     """
     Substring search on a column using ``ILIKE '%term%'`` (case-insensitive).
@@ -822,7 +842,10 @@ class _OwnersFilter(BaseParam[list[str]]):
         if not self.value:
             return select
 
-        conditions = [DagModel.owners.ilike(f"%{owner}%") for owner in 
self.value]
+        conditions = [
+            DagModel.owners.ilike(f"%{_escape_like_pattern(owner)}%", 
escape=_LIKE_ESCAPE_CHAR)
+            for owner in self.value
+        ]
         return select.where(or_(*conditions))
 
     @classmethod
@@ -1108,13 +1131,19 @@ class _AssetDependencyFilter(BaseParam[str]):
     """Filter Dags by specific asset dependencies."""
 
     def to_orm(self, select: Select) -> Select:
-        if self.value is None and self.skip_none:
+        if self.value is None:
             return select
 
+        escaped = _escape_like_pattern(self.value)
         asset_dag_subquery = (
             sql_select(DagScheduleAssetReference.dag_id)
             .join(AssetModel, DagScheduleAssetReference.asset_id == 
AssetModel.id)
-            .where(or_(AssetModel.name.ilike(f"%{self.value}%"), 
AssetModel.uri.ilike(f"%{self.value}%")))
+            .where(
+                or_(
+                    AssetModel.name.ilike(f"%{escaped}%", 
escape=_LIKE_ESCAPE_CHAR),
+                    AssetModel.uri.ilike(f"%{escaped}%", 
escape=_LIKE_ESCAPE_CHAR),
+                )
+            )
             .distinct()
         )
 
@@ -1138,16 +1167,17 @@ class _ConsumingAssetFilter(BaseParam[str | None]):
     """Filter Dag runs by consuming asset (name or URI)."""
 
     def to_orm(self, select: Select) -> Select:
-        if not self.value and self.skip_none:
+        if not self.value:
             return select
 
+        escaped = _escape_like_pattern(self.value)
         event_subquery = (
             sql_select(AssetEvent.id)
             .join(AssetModel, AssetEvent.asset_id == AssetModel.id)
             .where(
                 or_(
-                    AssetModel.name.ilike(f"%{self.value}%"),
-                    AssetModel.uri.ilike(f"%{self.value}%"),
+                    AssetModel.name.ilike(f"%{escaped}%", 
escape=_LIKE_ESCAPE_CHAR),
+                    AssetModel.uri.ilike(f"%{escaped}%", 
escape=_LIKE_ESCAPE_CHAR),
                 )
             )
             .distinct()
diff --git a/airflow-core/tests/unit/api_fastapi/common/test_parameters.py 
b/airflow-core/tests/unit/api_fastapi/common/test_parameters.py
index 6d5014b4cec..e2742d88a94 100644
--- a/airflow-core/tests/unit/api_fastapi/common/test_parameters.py
+++ b/airflow-core/tests/unit/api_fastapi/common/test_parameters.py
@@ -31,6 +31,10 @@ from airflow.api_fastapi.common.parameters import (
     NullableDatetimeRangeFilter,
     RangeFilter,
     SortParam,
+    _AssetDependencyFilter,
+    _ConsumingAssetFilter,
+    _escape_like_pattern,
+    _OwnersFilter,
     _PrefixPatternParam,
     _PrefixSearchParam,
     _SearchParam,
@@ -220,6 +224,67 @@ class TestSearchParam:
         assert " or " not in sql
 
 
+class TestEscapeLikePattern:
+    """The escape helper turns user input into a literal substring pattern.
+
+    Filter parameters that do *not* document wildcard semantics must call this 
so a user-supplied
+    ``%`` or ``_`` does not widen the match beyond the filter's intent. Search 
parameters that
+    explicitly expose wildcard semantics (see ``_SearchParam``) deliberately 
do not call it.
+    """
+
+    @pytest.mark.parametrize(
+        ("raw", "expected"),
+        [
+            ("plain", "plain"),
+            ("a%b", r"a\%b"),
+            ("a_b", r"a\_b"),
+            (r"a\b", r"a\\b"),
+            (r"a\%b", r"a\\\%b"),
+            ("%_\\", r"\%\_\\"),
+            ("", ""),
+        ],
+    )
+    def test_escapes_metacharacters(self, raw, expected):
+        assert _escape_like_pattern(raw) == expected
+
+
+class TestNonSearchFilterEscaping:
+    """``_OwnersFilter`` / ``_AssetDependencyFilter`` / 
``_ConsumingAssetFilter`` escape ``%`` and ``_``.
+
+    Compile-time check: the rendered SQL must wrap the *escaped* user value in 
``%...%`` and
+    declare an ``ESCAPE`` clause so the database treats user-supplied 
wildcards literally.
+    """
+
+    def test_owners_filter_escapes_user_wildcards(self):
+        param = _OwnersFilter().set_value(["100%_alice"])
+        statement = param.to_orm(select(DagModel))
+        sql = _compile(statement)
+        assert r"'%100\%\_alice%'" in sql
+        assert "escape" in sql
+
+    def test_asset_dependency_filter_escapes_user_wildcards(self):
+        param = _AssetDependencyFilter().set_value("ledger_%")
+        statement = param.to_orm(select(DagModel))
+        sql = _compile(statement)
+        assert r"'%ledger\_\%%'" in sql
+        assert "escape" in sql
+
+    def test_consuming_asset_filter_escapes_user_wildcards(self):
+        param = _ConsumingAssetFilter().set_value("foo_%bar")
+        statement = param.to_orm(select(DagRun))
+        sql = _compile(statement)
+        assert r"'%foo\_\%bar%'" in sql
+        assert "escape" in sql
+
+    def test_search_param_does_not_escape_user_wildcards(self):
+        """Counter-test: ``_SearchParam`` deliberately passes wildcards 
through."""
+        param = _SearchParam(DagModel.dag_id).set_value("foo_%bar")
+        statement = param.to_orm(select(DagModel))
+        sql = _compile(statement)
+        # Raw user wildcards are present, not the escaped form.
+        assert "'%foo_%bar%'" in sql
+
+
 class TestPrefixSearchParam:
     """Prefix search using range comparison (``attribute >= lower AND < 
upper``)."""
 

Reply via email to