This is an automated email from the ASF dual-hosted git repository.
vatsrahul1001 pushed a commit to branch v3-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v3-2-test by this push:
new 1ec52dff209 [v3-2-test] Redact rendered template fields while still
structured to preserve nested-key masking on truncation (#65906) (#67117)
1ec52dff209 is described below
commit 1ec52dff209eef8fddf37f9dd01daceebd5ec497
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue May 19 12:35:05 2026 +0530
[v3-2-test] Redact rendered template fields while still structured to
preserve nested-key masking on truncation (#65906) (#67117)
* Redact rendered template fields while still structured to preserve
nested-key masking on truncation
Generated-by: Claude Opus 4.7 (1M context) following the guidelines at
https://github.com/apache/airflow/blob/main/contributing-docs/05_pull_requests.rst#gen-ai-assisted-contributions
* Isolate masker patterns in nested-key truncation test
The new test_rendered_templates_mask_nested_keys_with_truncation shares
the singleton SecretsMasker with earlier tests in the file. One of those
(test_get_connection_from_context) fetches a connection whose password
fixture value happens to be the literal string "password", which the SDK
runtime registers as a regex mask via mask_secret(). When the new test
runs after it, that regex substitutes the literal token "password"
inside str(redacted) -- including the dict KEY name -- so the assertion
"'password': '***'" fails because the key itself is also masked.
Reset patterns/replacer for the test via monkeypatch (auto-restored on
teardown) so the assertion isolates value-masking (the behavior under
test) from key-token replacement (a side effect of leaked patterns).
(cherry picked from commit 4ceb0db321e2f716f326e3f1ccf82387992da121)
Co-authored-by: Jarek Potiuk <[email protected]>
Co-authored-by: Rahul Vats <[email protected]>
---
airflow-core/src/airflow/serialization/helpers.py | 5 +-
.../tests/unit/serialization/test_helpers.py | 24 ++++++++
.../src/airflow/sdk/execution_time/task_runner.py | 5 +-
.../task_sdk/execution_time/test_task_runner.py | 71 ++++++++++++++++++++++
4 files changed, 103 insertions(+), 2 deletions(-)
diff --git a/airflow-core/src/airflow/serialization/helpers.py
b/airflow-core/src/airflow/serialization/helpers.py
index 213b1991d6d..309f79299aa 100644
--- a/airflow-core/src/airflow/serialization/helpers.py
+++ b/airflow-core/src/airflow/serialization/helpers.py
@@ -106,7 +106,10 @@ def serialize_template_field(template_field: Any, name:
str) -> str | dict | lis
serialized = serialize_object(template_field)
if len(str(serialized)) > max_length:
- rendered = redact(str(serialized), name)
+ # Redact while still structured to preserve nested-key context (so
values under
+ # documented sensitive keys such as `password`, `token`, `secret`,
`api_key`
+ # are masked recursively); only stringify the redacted result for
truncation.
+ rendered = redact(serialized, name)
return truncate_rendered_value(str(rendered), max_length)
return serialized
diff --git a/airflow-core/tests/unit/serialization/test_helpers.py
b/airflow-core/tests/unit/serialization/test_helpers.py
index 0dbd70fd747..1e453a5e3d8 100644
--- a/airflow-core/tests/unit/serialization/test_helpers.py
+++ b/airflow-core/tests/unit/serialization/test_helpers.py
@@ -657,3 +657,27 @@ def
test_serialize_template_field_deeply_nested_dict_keys_recursively_normalized
assert all(isinstance(k, str) for k in inner[float_key])
assert "at 0x" not in str(r1)
json.dumps(r1, sort_keys=True)
+
+
[email protected]_redact
+def
test_serialize_template_field_masks_nested_sensitive_keys_on_truncation(monkeypatch):
+ """Nested sensitive-key masking applies consistently across the truncation
path.
+
+ A value under a documented sensitive key (``password``, ``token``,
``secret``,
+ ``api_key``) is masked recursively by ``redact()`` when the structured
value
+ is walked. The oversized branch must redact while still structured so that
+ nested-key context is preserved before stringification — otherwise the
post-
+ stringify ``redact()`` call only sees the outer field name and the
recursive
+ walker cannot reach the inner key.
+ """
+ monkeypatch.setenv("AIRFLOW__CORE__MAX_TEMPLATED_FIELD_LENGTH", "200")
+
+ nested_value = "REGRESSION-FIXTURE-NESTED-PASSWORD-VALUE"
+ payload = {"nested": {"password": nested_value, "zz_pad": "A" * 500}}
+
+ result = serialize_template_field(payload, "templates_dict")
+
+ assert isinstance(result, str)
+ assert "Truncated. You can change this behaviour" in result
+ assert nested_value not in result
+ assert "***" in result
diff --git a/task-sdk/src/airflow/sdk/execution_time/task_runner.py
b/task-sdk/src/airflow/sdk/execution_time/task_runner.py
index 60b6c5d8132..ae8705672e6 100644
--- a/task-sdk/src/airflow/sdk/execution_time/task_runner.py
+++ b/task-sdk/src/airflow/sdk/execution_time/task_runner.py
@@ -1047,7 +1047,10 @@ def _serialize_template_field(
serialized = serialize_object(template_field)
if len(str(serialized)) > max_length:
- rendered = redact(str(serialized), name)
+ # Redact while still structured to preserve nested-key context (so
values under
+ # documented sensitive keys such as `password`, `token`, `secret`,
`api_key`
+ # are masked recursively); only stringify the redacted result for
truncation.
+ rendered = redact(serialized, name)
return truncate_rendered_value(str(rendered), max_length)
return serialized
diff --git a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py
b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py
index 4c60830aa4a..354d9c3beee 100644
--- a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py
+++ b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py
@@ -2957,6 +2957,77 @@ class TestRuntimeTaskInstance:
assert env_vars_value.endswith("...")
assert "***" in env_vars_value # secrets are redacted before
truncation
+ @pytest.mark.enable_redact
+ def test_rendered_templates_mask_nested_keys_with_truncation(
+ self, create_runtime_ti, mock_supervisor_comms, monkeypatch
+ ):
+ """Nested sensitive-key masking applies consistently across the
truncation path.
+
+ A value under a documented sensitive key (``password``, ``token``,
``secret``,
+ ``api_key``) is masked recursively by ``redact()`` when the structured
value
+ is walked. The oversized branch must redact while still structured so
that
+ nested-key context is preserved before stringification — otherwise the
post-
+ stringify ``redact()`` call only sees the outer field name and the
recursive
+ walker cannot reach the inner key.
+ """
+ from airflow.sdk._shared.secrets_masker import _secrets_masker
+
+ # Earlier tests in this file (e.g. test_get_connection_from_context)
call
+ # mask_secret(conn.password) where the fixture's password value is the
literal
+ # "password"; that registers "password" as a regex pattern in the
singleton
+ # masker. Without isolation, str(redacted) gets that regex applied and
the
+ # dict KEY name "password" itself becomes "***", obscuring whether the
+ # structured nested-key walk fired. Reset the regex patterns for this
test
+ # (monkeypatch restores them on teardown) so the assertion can
distinguish
+ # value-masking (what we are testing) from key-token replacement.
+ masker = _secrets_masker()
+ monkeypatch.setattr(masker, "patterns", set())
+ monkeypatch.setattr(masker, "replacer", None)
+ # The SDK masker starts with an empty sensitive-fields list in the
test runtime
+ # (settings.py has not run); register `password` explicitly so the
structured
+ # walker has something to match. Production workers get this from
settings.py.
+ monkeypatch.setattr(
+ masker,
+ "sensitive_variables_fields",
+ list(masker.sensitive_variables_fields) + ["password"],
+ )
+
+ nested_value = "REGRESSION-FIXTURE-NESTED-PASSWORD-VALUE"
+
+ class CustomOperator(BaseOperator):
+ template_fields = ("env_vars",)
+
+ def __init__(self, env_vars, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.env_vars = env_vars
+
+ def execute(self, context):
+ pass
+
+ # Nested 'password' key under enough padding to exceed default
4096-char limit.
+ env_vars = {
+ "DB": {"password": nested_value, "host": "db.internal", "zz_pad":
"A" * 5000},
+ }
+
+ task = CustomOperator(task_id="test_nested_truncation_masking",
env_vars=env_vars)
+
+ runtime_ti = create_runtime_ti(task=task,
dag_id="test_nested_truncation_masking_dag")
+ run(runtime_ti, context=runtime_ti.get_template_context(),
log=mock.MagicMock())
+
+ msg = next(
+ c.kwargs["msg"]
+ for c in mock_supervisor_comms.send.mock_calls
+ if c.kwargs.get("msg") and getattr(c.kwargs["msg"], "type", None)
== "SetRenderedFields"
+ )
+ env_vars_value = msg.rendered_fields["env_vars"]
+
+ assert isinstance(env_vars_value, str)
+ assert env_vars_value.startswith(
+ "Truncated. You can change this behaviour in
[core]max_templated_field_length. "
+ )
+ assert nested_value not in env_vars_value
+ assert "'password': '***'" in env_vars_value
+
@pytest.mark.enable_redact
def test_rendered_templates_masks_secrets_in_complex_objects(
self, create_runtime_ti, mock_supervisor_comms