This is an automated email from the ASF dual-hosted git repository.
zhengruifeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 2ffd11dbf178 [SPARK-56954][PYTHON] Annotate that we support MapType
for from_json
2ffd11dbf178 is described below
commit 2ffd11dbf178cab85e53963e94246f4eb3caab81
Author: Tian Gao <[email protected]>
AuthorDate: Thu May 21 10:49:14 2026 +0800
[SPARK-56954][PYTHON] Annotate that we support MapType for from_json
### What changes were proposed in this pull request?
* Add annotation for `from_json` that we support `MapType` as schema input.
* Clean up the docs so it's consistent.
* Add a test for `MapType`.
### Why are the changes needed?
We already supported the feature, just need the annotation so it's clear to
the users and also type checkers won't complaint.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
CI for both lint and new test.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #55997 from gaogaotiantian/from-json-map-type.
Authored-by: Tian Gao <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/pyspark/sql/connect/functions/builtin.py | 3 ++-
python/pyspark/sql/functions/builtin.py | 7 ++++---
python/pyspark/sql/tests/connect/test_connect_function.py | 2 ++
python/pyspark/sql/tests/test_functions.py | 1 +
4 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/python/pyspark/sql/connect/functions/builtin.py
b/python/pyspark/sql/connect/functions/builtin.py
index 0ea0fe65a0ff..fd88faef1047 100644
--- a/python/pyspark/sql/connect/functions/builtin.py
+++ b/python/pyspark/sql/connect/functions/builtin.py
@@ -63,6 +63,7 @@ from pyspark.sql.types import (
DataType,
StructType,
ArrayType,
+ MapType,
StringType,
)
from pyspark.sql.utils import enum_to_value as _enum_to_value
@@ -1974,7 +1975,7 @@ from_csv.__doc__ = pysparkfuncs.from_csv.__doc__
def from_json(
col: "ColumnOrName",
- schema: Union[ArrayType, StructType, Column, str],
+ schema: Union[ArrayType, StructType, MapType, Column, str],
options: Optional[Mapping[str, str]] = None,
) -> Column:
if isinstance(schema, (str, Column)):
diff --git a/python/pyspark/sql/functions/builtin.py
b/python/pyspark/sql/functions/builtin.py
index eccb7d768e88..25a2eeb96494 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -49,6 +49,7 @@ from pyspark.sql.types import (
DataType,
StringType,
StructType,
+ MapType,
NumericType,
_from_numpy_type,
)
@@ -21044,7 +21045,7 @@ def json_tuple(col: "ColumnOrName", *fields: str) ->
Column:
@_try_remote_functions
def from_json(
col: "ColumnOrName",
- schema: Union[ArrayType, StructType, Column, str],
+ schema: Union[ArrayType, StructType, MapType, Column, str],
options: Optional[Mapping[str, str]] = None,
) -> Column:
"""
@@ -21061,8 +21062,8 @@ def from_json(
----------
col : :class:`~pyspark.sql.Column` or str
a column or column name in JSON format
- schema : :class:`DataType` or str
- a StructType, ArrayType of StructType or Python string literal with a
DDL-formatted string
+ schema : :class:`StructType`, :class:`ArrayType`, :class:`MapType`, or str
+ a StructType, ArrayType of StructType, MapType, or Python string
literal with a DDL-formatted string
to use when parsing the json column
options : dict, optional
options to control parsing. accepts the same options as the json
datasource.
diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py
b/python/pyspark/sql/tests/connect/test_connect_function.py
index ed653333b6c5..a2344064bb6f 100644
--- a/python/pyspark/sql/tests/connect/test_connect_function.py
+++ b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -26,6 +26,7 @@ from pyspark.sql.types import (
StructField,
ArrayType,
IntegerType,
+ MapType,
)
from pyspark.testing import assertDataFrameEqual
from pyspark.testing.pandasutils import PandasOnSparkTestUtils
@@ -1825,6 +1826,7 @@ class SparkConnectFunctionTests(ReusedMixedTestCase,
PandasOnSparkTestUtils):
"MAP<STRING,INT>",
StructType([StructField("a", IntegerType())]),
ArrayType(StructType([StructField("a", IntegerType())])),
+ MapType(StringType(), IntegerType()),
]:
self.compare_by_show(
cdf.select(CF.from_json(cdf.a, schema)),
diff --git a/python/pyspark/sql/tests/test_functions.py
b/python/pyspark/sql/tests/test_functions.py
index 978476d8e0b2..ceba0f03ae25 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -146,6 +146,7 @@ class FunctionsTestsMixin:
"ByteType", # should be imported from pyspark.sql.types
"Column", # should be imported from pyspark.sql
"DataType", # should be imported from pyspark.sql.types
+ "MapType", # should be imported from pyspark.sql.types
"NumericType", # should be imported from pyspark.sql.types
"PySparkTypeError", # should be imported from pyspark.errors
"PySparkValueError", # should be imported from pyspark.errors
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]