ephraimbuddy commented on code in PR #63871:
URL: https://github.com/apache/airflow/pull/63871#discussion_r3200705762
##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -51,55 +52,43 @@ def is_jsonable(x):
else:
return True
- def translate_tuples_to_lists(obj: Any):
- """Recursively convert tuples to lists."""
- if isinstance(obj, tuple):
- return [translate_tuples_to_lists(item) for item in obj]
- if isinstance(obj, list):
- return [translate_tuples_to_lists(item) for item in obj]
- if isinstance(obj, dict):
- return {key: translate_tuples_to_lists(value) for key, value in
obj.items()}
- return obj
+ def serialize_object(obj):
+ if obj is None or isinstance(obj, (str, int, float, bool)):
+ return obj
- def sort_dict_recursively(obj: Any) -> Any:
- """Recursively sort dictionaries to ensure consistent ordering."""
if isinstance(obj, dict):
- return {k: sort_dict_recursively(v) for k, v in
sorted(obj.items())}
- if isinstance(obj, list):
- return [sort_dict_recursively(item) for item in obj]
- if isinstance(obj, tuple):
- return tuple(sort_dict_recursively(item) for item in obj)
- return obj
+ # Sort dictionaries recursively to ensure consistent string
representation
+ # This prevents hash inconsistencies when dict ordering varies
+ return {
+ k: serialize_object(v)
+ for k, v in sorted(obj.items(), key=lambda kv:
(type(kv[0]).__name__, repr(kv[0])))
+ }
+
+ if isinstance(obj, (list, tuple)):
+ return [serialize_object(item) for item in obj]
+
+ # Use inspect.getattr_static to bypass any custom __getattr__ /
metaclass magic
+ if callable(inspect.getattr_static(obj, "serialize", None)):
+ return serialize_object(obj.serialize())
+
+ if callable(obj):
+ # Use qualified name; default repr embeds memory addresses, which
would change the DAG hash on every parse
+ full_qualified_name = qualname(obj, True)
+ return f"<callable {full_qualified_name}>"
+
+ # Non-primitive objects without a serialize attribute are converted to
str
+ # So they don't break json.dumps downstream
+ return str(obj)
max_length = conf.getint("core", "max_templated_field_length")
- if not is_jsonable(template_field):
- try:
- serialized = template_field.serialize()
- except AttributeError:
- if callable(template_field):
- full_qualified_name = qualname(template_field, True)
- serialized = f"<callable {full_qualified_name}>"
- else:
- serialized = str(template_field)
- if len(serialized) > max_length:
- rendered = redact(serialized, name)
- return truncate_rendered_value(str(rendered), max_length)
- return serialized
- if not template_field and not isinstance(template_field, tuple):
- # Avoid unnecessary serialization steps for empty fields unless they
are tuples
- # and need to be converted to lists
- return template_field
- template_field = translate_tuples_to_lists(template_field)
- # Sort dictionaries recursively to ensure consistent string representation
- # This prevents hash inconsistencies when dict ordering varies
- if isinstance(template_field, dict):
- template_field = sort_dict_recursively(template_field)
- serialized = str(template_field)
- if len(serialized) > max_length:
+ serialized = serialize_object(template_field)
+
+ if len(str(serialized)) > max_length:
rendered = redact(serialized, name)
return truncate_rendered_value(str(rendered), max_length)
- return template_field
+
+ return serialized if is_jsonable(serialized) else str(serialized)
Review Comment:
After `serialize_object`, every leaf is already a JSON primitive — so this
`is_jsonable` check is dead except for one case: dicts with non-JSON-encodable
keys (tuple/frozenset keys), which `json.dumps` rejects. In that case the
entire dict collapses to `str()`, defeating the recursive walk and
re-introducing the repr-with-memory-addresses problem. Either drop the check
entirely (recommended — `serialize_object `should guarantee `jsonable output)`
or handle exotic keys explicitly inside `serialize_object`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]