JSONB data as objects instead of strings

rusackas Sun, 22 Feb 2026 15:17:43 -0800

This is an automated email from the ASF dual-hosted git repository.

rusackas pushed a commit to branch fix-25125-json-data-type-preservation
in repository https://gitbox.apache.org/repos/asf/superset.git


commit ffc31f03fd7674c88753fb80f18396b5e9e917f9
Author: Evan Rusackas <[email protected]>
AuthorDate: Sun Feb 22 15:17:20 2026 -0800

    fix(result_set): preserve JSON/JSONB data as objects instead of strings
    
    This fix ensures that JSON and JSONB data from databases (like PostgreSQL)
    is preserved as Python objects (dicts/lists) when converting result sets
    to pandas DataFrames. Previously, nested data types were being stringified,
    which broke features like Handlebars templates that need to access JSON
    data as objects rather than strings.
    
    The fix works by:
    1. Tracking columns with nested/JSON data before stringification
    2. Restoring the original Python objects when converting to pandas
    
    Fixes #25125
    
    Co-Authored-By: Claude <[email protected]>
---
 superset/result_set.py              | 18 ++++++--
 tests/unit_tests/result_set_test.py | 88 +++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/superset/result_set.py b/superset/result_set.py
index ff65453aff1..a6dd18705d3 100644
--- a/superset/result_set.py
+++ b/superset/result_set.py
@@ -113,6 +113,8 @@ class SupersetResultSet:
         deduped_cursor_desc: list[tuple[Any, ...]] = []
         numpy_dtype: list[tuple[str, ...]] = []
         stringified_arr: NDArray[Any]
+        # Track columns with nested/JSON data to preserve them as objects
+        self._nested_columns: dict[str, list[Any]] = {}
 
         if cursor_description:
             # get deduped list of column names
@@ -154,9 +156,11 @@ class SupersetResultSet:
         if pa_data:  # pylint: disable=too-many-nested-blocks
             for i, column in enumerate(column_names):
                 if pa.types.is_nested(pa_data[i].type):
-                    # TODO: revisit nested column serialization once nested 
types
-                    #  are added as a natively supported column type in 
Superset
-                    #  (superset.utils.core.GenericDataType).
+                    # Preserve nested/JSON data as Python objects for use in
+                    # templates like Handlebars. Store original values before
+                    # stringifying for PyArrow compatibility.
+                    # See: https://github.com/apache/superset/issues/25125
+                    self._nested_columns[column] = array[column].tolist()
                     stringified_arr = stringify_values(array[column])
                     pa_data[i] = pa.array(stringified_arr.tolist())
 
@@ -247,7 +251,13 @@ class SupersetResultSet:
         return None
 
     def to_pandas_df(self) -> pd.DataFrame:
-        return self.convert_table_to_df(self.table)
+        df = self.convert_table_to_df(self.table)
+        # Restore nested/JSON columns as Python objects instead of strings
+        # This allows JSON data to be used directly in templates like 
Handlebars
+        for column, values in self._nested_columns.items():
+            if column in df.columns:
+                df[column] = values
+        return df
 
     @property
     def pa_table(self) -> pa.Table:
diff --git a/tests/unit_tests/result_set_test.py 
b/tests/unit_tests/result_set_test.py
index da5dcdafabc..afd8896cebf 100644
--- a/tests/unit_tests/result_set_test.py
+++ b/tests/unit_tests/result_set_test.py
@@ -185,3 +185,91 @@ def 
test_get_column_description_from_empty_data_using_cursor_description(
     )
     assert any(col.get("column_name") == "__time" for col in 
result_set.columns)
     logger.exception.assert_not_called()
+
+
+def test_json_data_type_preserved_as_objects() -> None:
+    """
+    Test that JSON/JSONB data is preserved as Python objects (dicts/lists)
+    instead of being converted to strings.
+
+    This is important for Handlebars templates and other features that need
+    to access JSON data as objects rather than strings.
+
+    See: https://github.com/apache/superset/issues/25125
+    """
+    # Simulate data from PostgreSQL JSONB column - psycopg2 returns dicts
+    data = [
+        (1, {"key": "value1", "nested": {"a": 1}}, "text1"),
+        (2, {"key": "value2", "items": [1, 2, 3]}, "text2"),
+        (3, None, "text3"),
+        (4, {"mixed": "string"}, "text4"),
+    ]
+    description = [
+        ("id", 23, None, None, None, None, None),  # INT
+        ("json_col", 3802, None, None, None, None, None),  # JSONB
+        ("text_col", 1043, None, None, None, None, None),  # VARCHAR
+    ]
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: 
ignore
+    df = result_set.to_pandas_df()
+
+    # JSON column should be preserved as Python objects, not strings
+    assert df["json_col"].iloc[0] == {"key": "value1", "nested": {"a": 1}}
+    assert isinstance(df["json_col"].iloc[0], dict)
+    assert df["json_col"].iloc[1] == {"key": "value2", "items": [1, 2, 3]}
+    assert df["json_col"].iloc[2] is None
+    assert df["json_col"].iloc[3] == {"mixed": "string"}
+
+    # Verify the data can be serialized to JSON (as it would be for API 
response)
+    from superset.utils import json as superset_json
+
+    records = df.to_dict(orient="records")
+    json_output = superset_json.dumps(records)
+    parsed = superset_json.loads(json_output)
+    assert parsed[0]["json_col"]["key"] == "value1"
+    assert parsed[0]["json_col"]["nested"]["a"] == 1
+    assert parsed[1]["json_col"]["items"] == [1, 2, 3]
+
+
+def test_json_data_with_homogeneous_structure() -> None:
+    """
+    Test that JSON data with consistent structure is also preserved as objects.
+    """
+    # All rows have the same JSON structure
+    data = [
+        (1, {"name": "Alice", "age": 30}),
+        (2, {"name": "Bob", "age": 25}),
+        (3, {"name": "Charlie", "age": 35}),
+    ]
+    description = [
+        ("id", 23, None, None, None, None, None),
+        ("data", 3802, None, None, None, None, None),
+    ]
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: 
ignore
+    df = result_set.to_pandas_df()
+
+    # Should be preserved as dicts
+    assert isinstance(df["data"].iloc[0], dict)
+    assert df["data"].iloc[0]["name"] == "Alice"
+    assert df["data"].iloc[1]["age"] == 25
+
+
+def test_array_data_type_preserved() -> None:
+    """
+    Test that array data is also preserved as Python lists.
+    """
+    data = [
+        (1, [1, 2, 3]),
+        (2, [4, 5, 6]),
+        (3, None),
+    ]
+    description = [
+        ("id", 23, None, None, None, None, None),
+        ("arr", 1007, None, None, None, None, None),  # INT ARRAY
+    ]
+    result_set = SupersetResultSet(data, description, BaseEngineSpec)  # type: 
ignore
+    df = result_set.to_pandas_df()
+
+    # Arrays should be preserved as lists
+    assert df["arr"].iloc[0] == [1, 2, 3]
+    assert isinstance(df["arr"].iloc[0], list)
+    assert df["arr"].iloc[2] is None

(superset) 01/01: fix(result_set): preserve JSON/JSONB data as objects instead of strings

Reply via email to