This is an automated email from the ASF dual-hosted git repository.
vatsrahul1001 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 80f1ab4d5a0 Tighten deserialization allowlist regex to require
full-string match (#66499)
80f1ab4d5a0 is described below
commit 80f1ab4d5a0f8b83873ce31f32b89d341f667b6f
Author: Jarek Potiuk <[email protected]>
AuthorDate: Mon May 18 11:12:57 2026 +0200
Tighten deserialization allowlist regex to require full-string match
(#66499)
* Tighten deserialization allowlist regex to use full-string match
The ``allowed_deserialization_classes_regexp`` allowlist used
``re.match()``,
which only anchors at the start of the string. A pattern like
``airflow\.models\.Variable`` therefore also admitted classnames such as
``airflow.models.Variable_Malicious``. Switch to ``re.fullmatch()`` so the
admin's pattern matches the entire classname; document the semantics in
the config description so operators know to use ``.*`` for prefix-style
allowances.
* Add newsfragment for #66499
---------
Co-authored-by: Rahul Vats <[email protected]>
---
airflow-core/newsfragments/66499.significant.rst | 14 ++++++++++++++
airflow-core/src/airflow/config_templates/config.yml | 5 ++++-
task-sdk/src/airflow/sdk/serde/__init__.py | 4 +++-
task-sdk/tests/task_sdk/serde/test_serde.py | 20 +++++++++++++++++++-
4 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/airflow-core/newsfragments/66499.significant.rst
b/airflow-core/newsfragments/66499.significant.rst
new file mode 100644
index 00000000000..5b321588876
--- /dev/null
+++ b/airflow-core/newsfragments/66499.significant.rst
@@ -0,0 +1,14 @@
+Tighten ``[core] allowed_deserialization_classes_regexp`` to require
full-string matches
+
+Patterns in ``[core] allowed_deserialization_classes_regexp`` are now matched
+against the entire classname using ``re.fullmatch()`` instead of
``re.match()``.
+Previously a pattern such as ``airflow\.models\.Variable`` admitted not only
+the intended class but also names that started with it
+(e.g. ``airflow.models.Variable_Malicious``), because ``re.match`` only anchors
+at the start of the string.
+
+The default value of this option is empty, so out-of-the-box deployments are
+unaffected. Deployments that configured this option with patterns relying on
+prefix-match semantics — for example ``airflow\.models\.`` to mean "any class
+under ``airflow.models``" — must add ``.*`` to the pattern
+(``airflow\.models\..*``) to retain the previous behaviour.
diff --git a/airflow-core/src/airflow/config_templates/config.yml
b/airflow-core/src/airflow/config_templates/config.yml
index 03593ce4ba0..57ddd14a6cb 100644
--- a/airflow-core/src/airflow/config_templates/config.yml
+++ b/airflow-core/src/airflow/config_templates/config.yml
@@ -261,7 +261,10 @@ core:
allowed_deserialization_classes_regexp:
description: |
Space-separated list of classes that may be imported during
deserialization. Items are processed
- as regex expressions. Python built-in classes (like dict) are always
allowed.
+ as regex expressions and matched against the full classname
(``re.fullmatch`` semantics), so a
+ pattern such as ``airflow\.models\.Variable`` does not also admit
``airflow.models.VariableXYZ``.
+ Use ``.*`` (e.g. ``airflow\.models\..*``) to allow a prefix and any
suffix. Python built-in
+ classes (like dict) are always allowed.
This is a secondary option to ``[core]
allowed_deserialization_classes``.
version_added: 2.8.2
type: string
diff --git a/task-sdk/src/airflow/sdk/serde/__init__.py
b/task-sdk/src/airflow/sdk/serde/__init__.py
index d25da4b79b6..7e96e73a604 100644
--- a/task-sdk/src/airflow/sdk/serde/__init__.py
+++ b/task-sdk/src/airflow/sdk/serde/__init__.py
@@ -332,8 +332,10 @@ def _match_glob(classname: str):
@functools.cache
def _match_regexp(classname: str):
"""Check if the given classname matches a pattern from
allowed_deserialization_classes_regexp using regexp."""
+ # fullmatch (not match) so a pattern like ``airflow\.models\.Variable``
cannot also admit
+ # ``airflow.models.Variable_Malicious`` — re.match only anchors at the
start of the string.
patterns = _get_regexp_patterns()
- return any(p.match(classname) is not None for p in patterns)
+ return any(p.fullmatch(classname) is not None for p in patterns)
def _stringify(classname: str, version: int, value: T | None) -> str:
diff --git a/task-sdk/tests/task_sdk/serde/test_serde.py
b/task-sdk/tests/task_sdk/serde/test_serde.py
index f264164ffca..17f71783cb6 100644
--- a/task-sdk/tests/task_sdk/serde/test_serde.py
+++ b/task-sdk/tests/task_sdk/serde/test_serde.py
@@ -367,7 +367,7 @@ class TestSerDe:
@conf_vars(
{
("core", "allowed_deserialization_classes"): "",
- ("core", "allowed_deserialization_classes_regexp"):
r"unit\.airflow\..",
+ ("core", "allowed_deserialization_classes_regexp"):
r"unit\.airflow\..*",
}
)
@pytest.mark.usefixtures("recalculate_patterns")
@@ -394,6 +394,24 @@ class TestSerDe:
assert _match("unit.airflow.deep")
assert _match("unit.airflow.FALSE") is False
+ @conf_vars(
+ {
+ ("core", "allowed_deserialization_classes"): "",
+ ("core", "allowed_deserialization_classes_regexp"):
r"unit\.airflow\.Variable",
+ }
+ )
+ @pytest.mark.usefixtures("recalculate_patterns")
+ def test_allow_list_regexp_does_not_prefix_match(self):
+ """
+ A pattern without an explicit end anchor must not admit classes that
share
+ the pattern as a prefix. ``re.match`` would let
``unit.airflow.Variable_Malicious``
+ through because it only anchors at the start of the string;
``re.fullmatch``
+ rejects it. Patterns with ``.*`` at the end retain prefix-style
behaviour.
+ """
+ assert _match("unit.airflow.Variable")
+ assert _match("unit.airflow.Variable_Malicious") is False
+ assert _match("unit.airflow.VariableSubclass") is False
+
def test_incompatible_version(self):
data = dict(
{