This is an automated email from the ASF dual-hosted git repository.
jscheffl pushed a commit to branch v3-1-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v3-1-test by this push:
new 91ea42a3c2a [v3-1-test] Fix structlog JSON serialization crash on
non-serializable objects (#62656) (#63721)
91ea42a3c2a is described below
commit 91ea42a3c2a33271d4ac430bf0459bccad11ddb0
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Mar 16 21:02:07 2026 +0100
[v3-1-test] Fix structlog JSON serialization crash on non-serializable
objects (#62656) (#63721)
* Fix structlog JSON serialization crash on non-serializable objects
Wrap the enc_hook in json_dumps() with a safe_default() fallback that
catches TypeError and falls back to str(). This prevents the logging
pipeline from crashing when log event dicts contain objects that
msgspec cannot serialize.
Closes: #62472
Closes: #62201
* Address review: hoist safe_default into factory, add tests
- Move the inline safe_default closure out of json_dumps into a
module-level _make_safe_enc_hook factory function to avoid
recreating it on every JSON log line.
- Add test for the actual crash scenario: an object whose
__structlog__() raises TypeError now falls back to str() instead
of crashing the logger.
- Add regression test confirming normal custom objects still
serialize via repr() through the standard enc_hook path.
- Add unit test for the default=None edge case in _make_safe_enc_hook.
* Handle UnicodeEncodeError in structlog JSON serialization
Broaden exception handling to catch ValueError (including
UnicodeEncodeError) in the enc_hook, and add a fallback around
msgspec.json.encode() to replace surrogate characters that can't
be encoded to UTF-8 JSON.
---------
(cherry picked from commit 0b43077a26e5c8dc2447e89980152b7964965aa7)
Co-authored-by: deepinsight coder
<[email protected]>
Co-authored-by: Claude Opus 4.6 <[email protected]>
---
.../src/airflow_shared/logging/structlog.py | 26 ++++++++-
shared/logging/tests/logging/test_structlog.py | 67 ++++++++++++++++++++++
2 files changed, 92 insertions(+), 1 deletion(-)
diff --git a/shared/logging/src/airflow_shared/logging/structlog.py
b/shared/logging/src/airflow_shared/logging/structlog.py
index 9e36d285baf..6b89d101d02 100644
--- a/shared/logging/src/airflow_shared/logging/structlog.py
+++ b/shared/logging/src/airflow_shared/logging/structlog.py
@@ -221,6 +221,20 @@ def respect_stdlib_disable(logger: Any, method_name: Any,
event_dict: EventDict)
return event_dict
+def _make_safe_enc_hook(default):
+ """Wrap an enc_hook so that serialization failures fall back to
``str()``."""
+
+ def safe_enc_hook(obj):
+ if default is not None:
+ try:
+ return default(obj)
+ except (TypeError, ValueError):
+ pass
+ return str(obj)
+
+ return safe_enc_hook
+
+
@cache
def structlog_processors(
json_output: bool,
@@ -313,7 +327,17 @@ def structlog_processors(
"event": msg.pop("event"),
**msg,
}
- return msgspec.json.encode(msg, enc_hook=default)
+
+ try:
+ return msgspec.json.encode(msg,
enc_hook=_make_safe_enc_hook(default))
+ except UnicodeEncodeError:
+ # Surrogate characters in strings can't be encoded to UTF-8
JSON.
+ # Replace them and retry.
+ def _sanitize(v):
+ return v.encode("utf-8", errors="replace").decode("utf-8")
if isinstance(v, str) else v
+
+ msg = {k: _sanitize(v) for k, v in msg.items()}
+ return msgspec.json.encode(msg,
enc_hook=_make_safe_enc_hook(default))
json = structlog.processors.JSONRenderer(serializer=json_dumps)
diff --git a/shared/logging/tests/logging/test_structlog.py
b/shared/logging/tests/logging/test_structlog.py
index 4755506190e..7627c1eecea 100644
--- a/shared/logging/tests/logging/test_structlog.py
+++ b/shared/logging/tests/logging/test_structlog.py
@@ -254,6 +254,73 @@ def test_json(structlog_config, get_logger, config_kwargs,
log_kwargs, expected_
}
+def test_json_non_serializable_object(structlog_config):
+ """Non-serializable objects in log context fall back to str() instead of
crashing."""
+
+ class BadStructlog:
+ def __structlog__(self):
+ raise TypeError("unsupported")
+
+ def __str__(self):
+ return "<BadStructlog>"
+
+ with structlog_config(json_output=True) as bio:
+ logger = structlog.get_logger("my.logger")
+ logger.info("Hello", obj=BadStructlog())
+
+ written = json.load(bio)
+ assert written["obj"] == "<BadStructlog>"
+ assert written["event"] == "Hello"
+
+
+def test_json_custom_object_uses_repr(structlog_config):
+ """Custom objects without __structlog__ serialize via repr() through the
normal enc_hook path."""
+
+ class CustomObj:
+ pass
+
+ with structlog_config(json_output=True) as bio:
+ logger = structlog.get_logger("my.logger")
+ logger.info("Hello", obj=CustomObj())
+
+ written = json.load(bio)
+ assert written["event"] == "Hello"
+ assert "CustomObj" in written["obj"]
+
+
+def test_safe_enc_hook_with_none_default():
+ """When default is None, _make_safe_enc_hook falls back to str()
directly."""
+ from airflow_shared.logging.structlog import _make_safe_enc_hook
+
+ hook = _make_safe_enc_hook(None)
+ assert hook(42) == "42"
+ assert hook(object()).startswith("<object object at")
+
+
+def test_safe_enc_hook_catches_value_error():
+ """ValueError (including UnicodeEncodeError) from enc_hook falls back to
str()."""
+ from airflow_shared.logging.structlog import _make_safe_enc_hook
+
+ def bad_default(obj):
+ raise ValueError("surrogates not allowed")
+
+ hook = _make_safe_enc_hook(bad_default)
+ assert hook(42) == "42"
+
+
+def test_json_unicode_surrogate_in_value(structlog_config):
+ """Surrogate characters in log values don't crash JSON serialization."""
+ with structlog_config(json_output=True) as bio:
+ logger = structlog.get_logger("my.logger")
+ logger.info("Hello", text="before \udce2 after")
+
+ written = json.load(bio)
+ assert written["event"] == "Hello"
+ # Surrogates are replaced with the Unicode replacement character
+ assert "\udce2" not in written["text"]
+ assert "before" in written["text"]
+
+
@pytest.mark.parametrize(
("get_logger"),
[