This is an automated email from the ASF dual-hosted git repository.

vatsrahul1001 pushed a commit to branch v3-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v3-2-test by this push:
     new 1ec52dff209 [v3-2-test] Redact rendered template fields while still 
structured to preserve nested-key masking on truncation (#65906) (#67117)
1ec52dff209 is described below

commit 1ec52dff209eef8fddf37f9dd01daceebd5ec497
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue May 19 12:35:05 2026 +0530

    [v3-2-test] Redact rendered template fields while still structured to 
preserve nested-key masking on truncation (#65906) (#67117)
    
    * Redact rendered template fields while still structured to preserve 
nested-key masking on truncation
    
    Generated-by: Claude Opus 4.7 (1M context) following the guidelines at 
https://github.com/apache/airflow/blob/main/contributing-docs/05_pull_requests.rst#gen-ai-assisted-contributions
    
    * Isolate masker patterns in nested-key truncation test
    
    The new test_rendered_templates_mask_nested_keys_with_truncation shares
    the singleton SecretsMasker with earlier tests in the file. One of those
    (test_get_connection_from_context) fetches a connection whose password
    fixture value happens to be the literal string "password", which the SDK
    runtime registers as a regex mask via mask_secret(). When the new test
    runs after it, that regex substitutes the literal token "password"
    inside str(redacted) -- including the dict KEY name -- so the assertion
    "'password': '***'" fails because the key itself is also masked.
    
    Reset patterns/replacer for the test via monkeypatch (auto-restored on
    teardown) so the assertion isolates value-masking (the behavior under
    test) from key-token replacement (a side effect of leaked patterns).
    (cherry picked from commit 4ceb0db321e2f716f326e3f1ccf82387992da121)
    
    Co-authored-by: Jarek Potiuk <[email protected]>
    Co-authored-by: Rahul Vats <[email protected]>
---
 airflow-core/src/airflow/serialization/helpers.py  |  5 +-
 .../tests/unit/serialization/test_helpers.py       | 24 ++++++++
 .../src/airflow/sdk/execution_time/task_runner.py  |  5 +-
 .../task_sdk/execution_time/test_task_runner.py    | 71 ++++++++++++++++++++++
 4 files changed, 103 insertions(+), 2 deletions(-)

diff --git a/airflow-core/src/airflow/serialization/helpers.py 
b/airflow-core/src/airflow/serialization/helpers.py
index 213b1991d6d..309f79299aa 100644
--- a/airflow-core/src/airflow/serialization/helpers.py
+++ b/airflow-core/src/airflow/serialization/helpers.py
@@ -106,7 +106,10 @@ def serialize_template_field(template_field: Any, name: 
str) -> str | dict | lis
     serialized = serialize_object(template_field)
 
     if len(str(serialized)) > max_length:
-        rendered = redact(str(serialized), name)
+        # Redact while still structured to preserve nested-key context (so 
values under
+        # documented sensitive keys such as `password`, `token`, `secret`, 
`api_key`
+        # are masked recursively); only stringify the redacted result for 
truncation.
+        rendered = redact(serialized, name)
         return truncate_rendered_value(str(rendered), max_length)
 
     return serialized
diff --git a/airflow-core/tests/unit/serialization/test_helpers.py 
b/airflow-core/tests/unit/serialization/test_helpers.py
index 0dbd70fd747..1e453a5e3d8 100644
--- a/airflow-core/tests/unit/serialization/test_helpers.py
+++ b/airflow-core/tests/unit/serialization/test_helpers.py
@@ -657,3 +657,27 @@ def 
test_serialize_template_field_deeply_nested_dict_keys_recursively_normalized
     assert all(isinstance(k, str) for k in inner[float_key])
     assert "at 0x" not in str(r1)
     json.dumps(r1, sort_keys=True)
+
+
[email protected]_redact
+def 
test_serialize_template_field_masks_nested_sensitive_keys_on_truncation(monkeypatch):
+    """Nested sensitive-key masking applies consistently across the truncation 
path.
+
+    A value under a documented sensitive key (``password``, ``token``, 
``secret``,
+    ``api_key``) is masked recursively by ``redact()`` when the structured 
value
+    is walked. The oversized branch must redact while still structured so that
+    nested-key context is preserved before stringification — otherwise the 
post-
+    stringify ``redact()`` call only sees the outer field name and the 
recursive
+    walker cannot reach the inner key.
+    """
+    monkeypatch.setenv("AIRFLOW__CORE__MAX_TEMPLATED_FIELD_LENGTH", "200")
+
+    nested_value = "REGRESSION-FIXTURE-NESTED-PASSWORD-VALUE"
+    payload = {"nested": {"password": nested_value, "zz_pad": "A" * 500}}
+
+    result = serialize_template_field(payload, "templates_dict")
+
+    assert isinstance(result, str)
+    assert "Truncated. You can change this behaviour" in result
+    assert nested_value not in result
+    assert "***" in result
diff --git a/task-sdk/src/airflow/sdk/execution_time/task_runner.py 
b/task-sdk/src/airflow/sdk/execution_time/task_runner.py
index 60b6c5d8132..ae8705672e6 100644
--- a/task-sdk/src/airflow/sdk/execution_time/task_runner.py
+++ b/task-sdk/src/airflow/sdk/execution_time/task_runner.py
@@ -1047,7 +1047,10 @@ def _serialize_template_field(
     serialized = serialize_object(template_field)
 
     if len(str(serialized)) > max_length:
-        rendered = redact(str(serialized), name)
+        # Redact while still structured to preserve nested-key context (so 
values under
+        # documented sensitive keys such as `password`, `token`, `secret`, 
`api_key`
+        # are masked recursively); only stringify the redacted result for 
truncation.
+        rendered = redact(serialized, name)
         return truncate_rendered_value(str(rendered), max_length)
 
     return serialized
diff --git a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py 
b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py
index 4c60830aa4a..354d9c3beee 100644
--- a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py
+++ b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py
@@ -2957,6 +2957,77 @@ class TestRuntimeTaskInstance:
         assert env_vars_value.endswith("...")
         assert "***" in env_vars_value  # secrets are redacted before 
truncation
 
+    @pytest.mark.enable_redact
+    def test_rendered_templates_mask_nested_keys_with_truncation(
+        self, create_runtime_ti, mock_supervisor_comms, monkeypatch
+    ):
+        """Nested sensitive-key masking applies consistently across the 
truncation path.
+
+        A value under a documented sensitive key (``password``, ``token``, 
``secret``,
+        ``api_key``) is masked recursively by ``redact()`` when the structured 
value
+        is walked. The oversized branch must redact while still structured so 
that
+        nested-key context is preserved before stringification — otherwise the 
post-
+        stringify ``redact()`` call only sees the outer field name and the 
recursive
+        walker cannot reach the inner key.
+        """
+        from airflow.sdk._shared.secrets_masker import _secrets_masker
+
+        # Earlier tests in this file (e.g. test_get_connection_from_context) 
call
+        # mask_secret(conn.password) where the fixture's password value is the 
literal
+        # "password"; that registers "password" as a regex pattern in the 
singleton
+        # masker. Without isolation, str(redacted) gets that regex applied and 
the
+        # dict KEY name "password" itself becomes "***", obscuring whether the
+        # structured nested-key walk fired. Reset the regex patterns for this 
test
+        # (monkeypatch restores them on teardown) so the assertion can 
distinguish
+        # value-masking (what we are testing) from key-token replacement.
+        masker = _secrets_masker()
+        monkeypatch.setattr(masker, "patterns", set())
+        monkeypatch.setattr(masker, "replacer", None)
+        # The SDK masker starts with an empty sensitive-fields list in the 
test runtime
+        # (settings.py has not run); register `password` explicitly so the 
structured
+        # walker has something to match. Production workers get this from 
settings.py.
+        monkeypatch.setattr(
+            masker,
+            "sensitive_variables_fields",
+            list(masker.sensitive_variables_fields) + ["password"],
+        )
+
+        nested_value = "REGRESSION-FIXTURE-NESTED-PASSWORD-VALUE"
+
+        class CustomOperator(BaseOperator):
+            template_fields = ("env_vars",)
+
+            def __init__(self, env_vars, *args, **kwargs):
+                super().__init__(*args, **kwargs)
+                self.env_vars = env_vars
+
+            def execute(self, context):
+                pass
+
+        # Nested 'password' key under enough padding to exceed default 
4096-char limit.
+        env_vars = {
+            "DB": {"password": nested_value, "host": "db.internal", "zz_pad": 
"A" * 5000},
+        }
+
+        task = CustomOperator(task_id="test_nested_truncation_masking", 
env_vars=env_vars)
+
+        runtime_ti = create_runtime_ti(task=task, 
dag_id="test_nested_truncation_masking_dag")
+        run(runtime_ti, context=runtime_ti.get_template_context(), 
log=mock.MagicMock())
+
+        msg = next(
+            c.kwargs["msg"]
+            for c in mock_supervisor_comms.send.mock_calls
+            if c.kwargs.get("msg") and getattr(c.kwargs["msg"], "type", None) 
== "SetRenderedFields"
+        )
+        env_vars_value = msg.rendered_fields["env_vars"]
+
+        assert isinstance(env_vars_value, str)
+        assert env_vars_value.startswith(
+            "Truncated. You can change this behaviour in 
[core]max_templated_field_length. "
+        )
+        assert nested_value not in env_vars_value
+        assert "'password': '***'" in env_vars_value
+
     @pytest.mark.enable_redact
     def test_rendered_templates_masks_secrets_in_complex_objects(
         self, create_runtime_ti, mock_supervisor_comms

Reply via email to