This is an automated email from the ASF dual-hosted git repository.

ephraimanierobi pushed a commit to branch v2-8-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 673f7f837fa00471008ebae88b192f7cec729d55
Author: Usiel Riedl <usiel.ri...@automattic.com>
AuthorDate: Wed Nov 22 18:19:04 2023 +0800

    Fix for infinite recursion due to secrets_masker (#35048)
    
    * Fix for infinite recursion due to secrets_masker
    
    We can get into trouble for types that cannot be initiated with re2's 
`type(obj)()` call. The `secrets_masker` thus fails, which triggers a warning 
log, which also fails because we pass the object to the logger, which is then 
masked again, and so forth.
    
    We can break the recursion by emitting a log without trying to redact the 
value again (this ensures no new bug will cause a stack overflow). This issue 
has occured previously: 
https://github.com/apache/airflow/issues/19816#issuecomment-983311373
    Additionally, we fix this particular bug by ensuring whatever re2 receives 
is a simple `str`.
    
    I noticed this issue while working with a DAG that calls Airflow's DB 
cleanup function.
    
    Example DAG:
    ```
    from datetime import datetime
    
    from airflow import DAG
    from airflow.models import Variable
    from airflow.operators.python import PythonOperator
    
    class MyStringClass(str):
        def __init__(self, required_arg):
            pass
    
    def fail(task_instance):
        # make sure the `SecretsMasker` has a replacer
        Variable.set(key="secret", value="secret_value")
        Variable.get("secret")
        # trigger the infinite recursion
        task_instance.log.info("%s", MyStringClass("secret_value"))
    
    with DAG(
        dag_id="secrets_masker_recursion",
        start_date=datetime(2023, 9, 26),
    ):
        PythonOperator(task_id="fail", python_callable=fail)
    
    ```
    
    * Improve error message
    
    ---------
    
    Co-authored-by: Tzu-ping Chung <uranu...@gmail.com>
---
 airflow/utils/log/secrets_masker.py    |  9 +++++----
 tests/utils/log/test_secrets_masker.py | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/airflow/utils/log/secrets_masker.py 
b/airflow/utils/log/secrets_masker.py
index 246377c169..0b1b65f840 100644
--- a/airflow/utils/log/secrets_masker.py
+++ b/airflow/utils/log/secrets_masker.py
@@ -261,7 +261,7 @@ class SecretsMasker(logging.Filter):
                     # We can't replace specific values, but the key-based 
redacting
                     # can still happen, so we can't short-circuit, we need to 
walk
                     # the structure.
-                    return self.replacer.sub("***", item)
+                    return self.replacer.sub("***", str(item))
                 return item
             elif isinstance(item, (tuple, set)):
                 # Turn set in to tuple!
@@ -276,14 +276,15 @@ class SecretsMasker(logging.Filter):
                 return item
         # I think this should never happen, but it does not hurt to leave it 
just in case
         # Well. It happened (see 
https://github.com/apache/airflow/issues/19816#issuecomment-983311373)
-        # but it caused infinite recursion, so we need to cast it to str first.
+        # but it caused infinite recursion, to avoid this we mark the log as 
already filtered.
         except Exception as exc:
             log.warning(
-                "Unable to redact %r, please report this via 
<https://github.com/apache/airflow/issues>. "
-                "Error was: %s: %s",
+                "Unable to redact value of type %s, please report this via "
+                "<https://github.com/apache/airflow/issues>. Error was: %s: 
%s",
                 item,
                 type(exc).__name__,
                 exc,
+                extra={self.ALREADY_FILTERED_FLAG: True},
             )
             return item
 
diff --git a/tests/utils/log/test_secrets_masker.py 
b/tests/utils/log/test_secrets_masker.py
index ffaf2977ae..4657a7c1f3 100644
--- a/tests/utils/log/test_secrets_masker.py
+++ b/tests/utils/log/test_secrets_masker.py
@@ -305,6 +305,24 @@ class TestSecretsMasker:
             got = redact(val, max_depth=max_depth)
             assert got == expected
 
+    def test_redact_with_str_type(self, logger, caplog):
+        """
+        SecretsMasker's re2 replacer has issues handling a redactable item of 
type
+        `str` with required constructor args. This test ensures there is a 
shim in
+        place that avoids any issues.
+        See: 
https://github.com/apache/airflow/issues/19816#issuecomment-983311373
+        """
+
+        class StrLikeClassWithRequiredConstructorArg(str):
+            def __init__(self, required_arg):
+                pass
+
+        text = StrLikeClassWithRequiredConstructorArg("password")
+        logger.info("redacted: %s", text)
+
+        # we expect the object's __str__() output to be logged (no warnings 
due to a failed masking)
+        assert caplog.messages == ["redacted: ***"]
+
     @pytest.mark.parametrize(
         "state, expected",
         [

Reply via email to