This is an automated email from the ASF dual-hosted git repository.

jscheffl pushed a commit to branch v3-1-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v3-1-test by this push:
     new 91ea42a3c2a [v3-1-test] Fix structlog JSON serialization crash on 
non-serializable objects (#62656) (#63721)
91ea42a3c2a is described below

commit 91ea42a3c2a33271d4ac430bf0459bccad11ddb0
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Mar 16 21:02:07 2026 +0100

    [v3-1-test] Fix structlog JSON serialization crash on non-serializable 
objects (#62656) (#63721)
    
    * Fix structlog JSON serialization crash on non-serializable objects
    
    Wrap the enc_hook in json_dumps() with a safe_default() fallback that
    catches TypeError and falls back to str(). This prevents the logging
    pipeline from crashing when log event dicts contain objects that
    msgspec cannot serialize.
    
    Closes: #62472
    Closes: #62201
    
    
    
    * Address review: hoist safe_default into factory, add tests
    
    - Move the inline safe_default closure out of json_dumps into a
      module-level _make_safe_enc_hook factory function to avoid
      recreating it on every JSON log line.
    - Add test for the actual crash scenario: an object whose
      __structlog__() raises TypeError now falls back to str() instead
      of crashing the logger.
    - Add regression test confirming normal custom objects still
      serialize via repr() through the standard enc_hook path.
    - Add unit test for the default=None edge case in _make_safe_enc_hook.
    
    * Handle UnicodeEncodeError in structlog JSON serialization
    
    Broaden exception handling to catch ValueError (including
    UnicodeEncodeError) in the enc_hook, and add a fallback around
    msgspec.json.encode() to replace surrogate characters that can't
    be encoded to UTF-8 JSON.
    
    ---------
    (cherry picked from commit 0b43077a26e5c8dc2447e89980152b7964965aa7)
    
    Co-authored-by: deepinsight coder 
<[email protected]>
    Co-authored-by: Claude Opus 4.6 <[email protected]>
---
 .../src/airflow_shared/logging/structlog.py        | 26 ++++++++-
 shared/logging/tests/logging/test_structlog.py     | 67 ++++++++++++++++++++++
 2 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/shared/logging/src/airflow_shared/logging/structlog.py 
b/shared/logging/src/airflow_shared/logging/structlog.py
index 9e36d285baf..6b89d101d02 100644
--- a/shared/logging/src/airflow_shared/logging/structlog.py
+++ b/shared/logging/src/airflow_shared/logging/structlog.py
@@ -221,6 +221,20 @@ def respect_stdlib_disable(logger: Any, method_name: Any, 
event_dict: EventDict)
     return event_dict
 
 
+def _make_safe_enc_hook(default):
+    """Wrap an enc_hook so that serialization failures fall back to 
``str()``."""
+
+    def safe_enc_hook(obj):
+        if default is not None:
+            try:
+                return default(obj)
+            except (TypeError, ValueError):
+                pass
+        return str(obj)
+
+    return safe_enc_hook
+
+
 @cache
 def structlog_processors(
     json_output: bool,
@@ -313,7 +327,17 @@ def structlog_processors(
                 "event": msg.pop("event"),
                 **msg,
             }
-            return msgspec.json.encode(msg, enc_hook=default)
+
+            try:
+                return msgspec.json.encode(msg, 
enc_hook=_make_safe_enc_hook(default))
+            except UnicodeEncodeError:
+                # Surrogate characters in strings can't be encoded to UTF-8 
JSON.
+                # Replace them and retry.
+                def _sanitize(v):
+                    return v.encode("utf-8", errors="replace").decode("utf-8") 
if isinstance(v, str) else v
+
+                msg = {k: _sanitize(v) for k, v in msg.items()}
+                return msgspec.json.encode(msg, 
enc_hook=_make_safe_enc_hook(default))
 
         json = structlog.processors.JSONRenderer(serializer=json_dumps)
 
diff --git a/shared/logging/tests/logging/test_structlog.py 
b/shared/logging/tests/logging/test_structlog.py
index 4755506190e..7627c1eecea 100644
--- a/shared/logging/tests/logging/test_structlog.py
+++ b/shared/logging/tests/logging/test_structlog.py
@@ -254,6 +254,73 @@ def test_json(structlog_config, get_logger, config_kwargs, 
log_kwargs, expected_
     }
 
 
+def test_json_non_serializable_object(structlog_config):
+    """Non-serializable objects in log context fall back to str() instead of 
crashing."""
+
+    class BadStructlog:
+        def __structlog__(self):
+            raise TypeError("unsupported")
+
+        def __str__(self):
+            return "<BadStructlog>"
+
+    with structlog_config(json_output=True) as bio:
+        logger = structlog.get_logger("my.logger")
+        logger.info("Hello", obj=BadStructlog())
+
+    written = json.load(bio)
+    assert written["obj"] == "<BadStructlog>"
+    assert written["event"] == "Hello"
+
+
+def test_json_custom_object_uses_repr(structlog_config):
+    """Custom objects without __structlog__ serialize via repr() through the 
normal enc_hook path."""
+
+    class CustomObj:
+        pass
+
+    with structlog_config(json_output=True) as bio:
+        logger = structlog.get_logger("my.logger")
+        logger.info("Hello", obj=CustomObj())
+
+    written = json.load(bio)
+    assert written["event"] == "Hello"
+    assert "CustomObj" in written["obj"]
+
+
+def test_safe_enc_hook_with_none_default():
+    """When default is None, _make_safe_enc_hook falls back to str() 
directly."""
+    from airflow_shared.logging.structlog import _make_safe_enc_hook
+
+    hook = _make_safe_enc_hook(None)
+    assert hook(42) == "42"
+    assert hook(object()).startswith("<object object at")
+
+
+def test_safe_enc_hook_catches_value_error():
+    """ValueError (including UnicodeEncodeError) from enc_hook falls back to 
str()."""
+    from airflow_shared.logging.structlog import _make_safe_enc_hook
+
+    def bad_default(obj):
+        raise ValueError("surrogates not allowed")
+
+    hook = _make_safe_enc_hook(bad_default)
+    assert hook(42) == "42"
+
+
+def test_json_unicode_surrogate_in_value(structlog_config):
+    """Surrogate characters in log values don't crash JSON serialization."""
+    with structlog_config(json_output=True) as bio:
+        logger = structlog.get_logger("my.logger")
+        logger.info("Hello", text="before \udce2 after")
+
+    written = json.load(bio)
+    assert written["event"] == "Hello"
+    # Surrogates are replaced with the Unicode replacement character
+    assert "\udce2" not in written["text"]
+    assert "before" in written["text"]
+
+
 @pytest.mark.parametrize(
     ("get_logger"),
     [

Reply via email to