wjddn279 commented on code in PR #63871:
URL: https://github.com/apache/airflow/pull/63871#discussion_r3212530026


##########
airflow-core/src/airflow/serialization/helpers.py:
##########
@@ -19,87 +19,89 @@
 from __future__ import annotations
 
 import contextlib
+import inspect
 from typing import TYPE_CHECKING, Any
 
 from airflow._shared.module_loading import qualname
 from airflow._shared.secrets_masker import redact
 from airflow._shared.template_rendering import truncate_rendered_value
 from airflow.configuration import conf
-from airflow.settings import json
 
 if TYPE_CHECKING:
     from airflow.partition_mappers.base import PartitionMapper
     from airflow.timetables.base import Timetable as CoreTimetable
 
 
-def serialize_template_field(template_field: Any, name: str) -> str | dict | 
list | int | float:
+def serialize_template_field(template_field: Any, name: str) -> str | dict | 
list | int | float | bool:
     """
     Return a serializable representation of the templated field.
 
-    If ``templated_field`` is provided via a callable then
-    return the following serialized value: ``<callable full_qualified_name>``
+    The walk has two responsibilities:
 
-    If ``templated_field`` contains a class or instance that requires recursive
-    templating, store them as strings. Otherwise simply return the field as-is.
+    1. **Make the template_field JSON-encodable** — every container is rebuilt
+       with primitive leaves (str/int/float/bool/None), tuples and sets are
+       flattened to lists, and unsupported objects fall through to ``str()``
+       so ``json.dumps`` never raises on the result.
+    2. **Keep the output deterministic across parses** — callables are replaced
+       with their qualified name (never the default ``<function ... at 0x...>``
+       repr), dicts are key-sorted, and (frozen)sets are sorted by element so
+       the same input always produces the same string.
     """
 
-    def is_jsonable(x):
-        try:
-            json.dumps(x)
-        except (TypeError, OverflowError):
-            return False
-        else:
-            return True
-
-    def translate_tuples_to_lists(obj: Any):
-        """Recursively convert tuples to lists."""
-        if isinstance(obj, tuple):
-            return [translate_tuples_to_lists(item) for item in obj]
-        if isinstance(obj, list):
-            return [translate_tuples_to_lists(item) for item in obj]
-        if isinstance(obj, dict):
-            return {key: translate_tuples_to_lists(value) for key, value in 
obj.items()}
-        return obj
+    def is_jsonable_primitive_type(k):
+        """Whether ``k`` is one of the primitive types json.dumps accepts as a 
dict key or value."""
+        return k is None or isinstance(k, (str, int, float, bool))
+
+    def serialize_dict_key(key):
+        """Normalize a dict key to a JSON-encodable primitive."""
+        serialized_key = serialize_object(key)
+        return serialized_key if is_jsonable_primitive_type(serialized_key) 
else str(serialized_key)
+
+    def serialize_object(obj):
+        """Recursively rewrite ``obj`` into a JSON-encodable, hash-stable 
structure."""
+        if is_jsonable_primitive_type(obj):
+            return obj
 
-    def sort_dict_recursively(obj: Any) -> Any:
-        """Recursively sort dictionaries to ensure consistent ordering."""
         if isinstance(obj, dict):
-            return {k: sort_dict_recursively(v) for k, v in 
sorted(obj.items())}
-        if isinstance(obj, list):
-            return [sort_dict_recursively(item) for item in obj]
-        if isinstance(obj, tuple):
-            return tuple(sort_dict_recursively(item) for item in obj)
-        return obj
+            # Sort dictionaries recursively to ensure consistent string 
representation
+            # This prevents hash inconsistencies when dict ordering varies
+            return {
+                serialize_dict_key(k): serialize_object(v)
+                for k, v in sorted(obj.items(), key=lambda kv: 
(type(kv[0]).__name__, repr(kv[0])))
+            }
+
+        if isinstance(obj, (list, tuple)):
+            return [serialize_object(item) for item in obj]
+
+        if isinstance(obj, (set, frozenset)):

Review Comment:
   Previously `set/frozenset` values were converted via str(), which leaked 
memory addresses for custom objects and destabilized the DAG hash on every 
parse. The new logic recursively serializes each element and returns a sorted 
list, producing deterministic, JSON-encodable output.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to