This is an automated email from the ASF dual-hosted git repository.

vatsrahul1001 pushed a commit to branch v3-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v3-2-test by this push:
     new 5e2fcf36f57 [v3-2-test] Tighten deserialization allowlist regex to 
require full-string match (#66499) (#67096)
5e2fcf36f57 is described below

commit 5e2fcf36f57e1a101f25099b6d034051483b40e3
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon May 18 17:27:14 2026 +0530

    [v3-2-test] Tighten deserialization allowlist regex to require full-string 
match (#66499) (#67096)
    
    * Tighten deserialization allowlist regex to use full-string match
    
    The ``allowed_deserialization_classes_regexp`` allowlist used 
``re.match()``,
    which only anchors at the start of the string. A pattern like
    ``airflow\.models\.Variable`` therefore also admitted classnames such as
    ``airflow.models.Variable_Malicious``. Switch to ``re.fullmatch()`` so the
    admin's pattern matches the entire classname; document the semantics in
    the config description so operators know to use ``.*`` for prefix-style
    allowances.
    
    * Add newsfragment for #66499
    
    ---------
    (cherry picked from commit 80f1ab4d5a0f8b83873ce31f32b89d341f667b6f)
    
    Co-authored-by: Jarek Potiuk <[email protected]>
    Co-authored-by: Rahul Vats <[email protected]>
---
 airflow-core/newsfragments/66499.significant.rst     | 14 ++++++++++++++
 airflow-core/src/airflow/config_templates/config.yml |  5 ++++-
 task-sdk/src/airflow/sdk/serde/__init__.py           |  4 +++-
 task-sdk/tests/task_sdk/serde/test_serde.py          | 20 +++++++++++++++++++-
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/airflow-core/newsfragments/66499.significant.rst 
b/airflow-core/newsfragments/66499.significant.rst
new file mode 100644
index 00000000000..5b321588876
--- /dev/null
+++ b/airflow-core/newsfragments/66499.significant.rst
@@ -0,0 +1,14 @@
+Tighten ``[core] allowed_deserialization_classes_regexp`` to require 
full-string matches
+
+Patterns in ``[core] allowed_deserialization_classes_regexp`` are now matched
+against the entire classname using ``re.fullmatch()`` instead of 
``re.match()``.
+Previously a pattern such as ``airflow\.models\.Variable`` admitted not only
+the intended class but also names that started with it
+(e.g. ``airflow.models.Variable_Malicious``), because ``re.match`` only anchors
+at the start of the string.
+
+The default value of this option is empty, so out-of-the-box deployments are
+unaffected. Deployments that configured this option with patterns relying on
+prefix-match semantics — for example ``airflow\.models\.`` to mean "any class
+under ``airflow.models``" — must add ``.*`` to the pattern
+(``airflow\.models\..*``) to retain the previous behaviour.
diff --git a/airflow-core/src/airflow/config_templates/config.yml 
b/airflow-core/src/airflow/config_templates/config.yml
index bbb137880ba..ce5caf06437 100644
--- a/airflow-core/src/airflow/config_templates/config.yml
+++ b/airflow-core/src/airflow/config_templates/config.yml
@@ -261,7 +261,10 @@ core:
     allowed_deserialization_classes_regexp:
       description: |
         Space-separated list of classes that may be imported during 
deserialization. Items are processed
-        as regex expressions. Python built-in classes (like dict) are always 
allowed.
+        as regex expressions and matched against the full classname 
(``re.fullmatch`` semantics), so a
+        pattern such as ``airflow\.models\.Variable`` does not also admit 
``airflow.models.VariableXYZ``.
+        Use ``.*`` (e.g. ``airflow\.models\..*``) to allow a prefix and any 
suffix. Python built-in
+        classes (like dict) are always allowed.
         This is a secondary option to ``[core] 
allowed_deserialization_classes``.
       version_added: 2.8.2
       type: string
diff --git a/task-sdk/src/airflow/sdk/serde/__init__.py 
b/task-sdk/src/airflow/sdk/serde/__init__.py
index c22b6b7caea..d0eafbe62a2 100644
--- a/task-sdk/src/airflow/sdk/serde/__init__.py
+++ b/task-sdk/src/airflow/sdk/serde/__init__.py
@@ -332,8 +332,10 @@ def _match_glob(classname: str):
 @functools.cache
 def _match_regexp(classname: str):
     """Check if the given classname matches a pattern from 
allowed_deserialization_classes_regexp using regexp."""
+    # fullmatch (not match) so a pattern like ``airflow\.models\.Variable`` 
cannot also admit
+    # ``airflow.models.Variable_Malicious`` — re.match only anchors at the 
start of the string.
     patterns = _get_regexp_patterns()
-    return any(p.match(classname) is not None for p in patterns)
+    return any(p.fullmatch(classname) is not None for p in patterns)
 
 
 def _stringify(classname: str, version: int, value: T | None) -> str:
diff --git a/task-sdk/tests/task_sdk/serde/test_serde.py 
b/task-sdk/tests/task_sdk/serde/test_serde.py
index f264164ffca..17f71783cb6 100644
--- a/task-sdk/tests/task_sdk/serde/test_serde.py
+++ b/task-sdk/tests/task_sdk/serde/test_serde.py
@@ -367,7 +367,7 @@ class TestSerDe:
     @conf_vars(
         {
             ("core", "allowed_deserialization_classes"): "",
-            ("core", "allowed_deserialization_classes_regexp"): 
r"unit\.airflow\..",
+            ("core", "allowed_deserialization_classes_regexp"): 
r"unit\.airflow\..*",
         }
     )
     @pytest.mark.usefixtures("recalculate_patterns")
@@ -394,6 +394,24 @@ class TestSerDe:
         assert _match("unit.airflow.deep")
         assert _match("unit.airflow.FALSE") is False
 
+    @conf_vars(
+        {
+            ("core", "allowed_deserialization_classes"): "",
+            ("core", "allowed_deserialization_classes_regexp"): 
r"unit\.airflow\.Variable",
+        }
+    )
+    @pytest.mark.usefixtures("recalculate_patterns")
+    def test_allow_list_regexp_does_not_prefix_match(self):
+        """
+        A pattern without an explicit end anchor must not admit classes that 
share
+        the pattern as a prefix. ``re.match`` would let 
``unit.airflow.Variable_Malicious``
+        through because it only anchors at the start of the string; 
``re.fullmatch``
+        rejects it. Patterns with ``.*`` at the end retain prefix-style 
behaviour.
+        """
+        assert _match("unit.airflow.Variable")
+        assert _match("unit.airflow.Variable_Malicious") is False
+        assert _match("unit.airflow.VariableSubclass") is False
+
     def test_incompatible_version(self):
         data = dict(
             {

Reply via email to