This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 6e6e425150c2 [SPARK-51468][SQL] Revert "From json/xml should not
change collations in the given schema"
6e6e425150c2 is described below
commit 6e6e425150c23236cd1f87858549bde6aa0117be
Author: Stefan Kandic <[email protected]>
AuthorDate: Tue Mar 11 17:17:29 2025 +0300
[SPARK-51468][SQL] Revert "From json/xml should not change collations in
the given schema"
### What changes were proposed in this pull request?
After removing session-level collation (#49772) we can also revert the PR
that changed the behavior of `from_json` and `from_xml` expressions to use json
and not sql type representation under the hood (#48750).
### Why are the changes needed?
Now that we don't have correctness problems with session level collation,
using `sql` instead of `json` will lead to smaller and more efficient type
representation.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing unit tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #50234 from stefankandic/revertFromJsonChange.
Authored-by: Stefan Kandic <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
(cherry picked from commit 0094f445b2396e97fbb48dfe810fcf65dfdf4828)
Signed-off-by: Max Gekk <[email protected]>
---
.../src/main/scala/org/apache/spark/sql/functions.scala | 4 ++--
.../query-tests/queries/function_from_json.json | 2 +-
.../query-tests/queries/function_from_json.proto.bin | Bin 940 -> 770 bytes
.../query-tests/queries/function_from_xml.json | 2 +-
.../query-tests/queries/function_from_xml.proto.bin | Bin 937 -> 767 bytes
5 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
index 90c50cb53f5d..7dfbc9911be1 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
@@ -6901,7 +6901,7 @@ object functions {
*/
// scalastyle:on line.size.limit
def from_json(e: Column, schema: DataType, options: Map[String, String]):
Column = {
- from_json(e, lit(schema.json), options.iterator)
+ from_json(e, lit(schema.sql), options.iterator)
}
// scalastyle:off line.size.limit
@@ -7773,7 +7773,7 @@ object functions {
*/
// scalastyle:on line.size.limit
def from_xml(e: Column, schema: StructType, options: java.util.Map[String,
String]): Column =
- from_xml(e, lit(schema.json), options.asScala.iterator)
+ from_xml(e, lit(schema.sql), options.asScala.iterator)
// scalastyle:off line.size.limit
/**
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
index 3c359d024c24..43b1abd1d59a 100644
---
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
+++
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
@@ -37,7 +37,7 @@
}
}, {
"literal": {
- "string":
"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
+ "string": "STRUCT\u003cid: BIGINT, a: INT, b: DOUBLE\u003e"
},
"common": {
"origin": {
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
index 001e2bd46740..59ecbec22861 100644
Binary files
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
and
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
differ
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
index 04ffe209f170..44faa65e6a0c 100644
---
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
+++
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
@@ -37,7 +37,7 @@
}
}, {
"literal": {
- "string":
"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
+ "string": "STRUCT\u003cid: BIGINT, a: INT, b: DOUBLE\u003e"
},
"common": {
"origin": {
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin
index d4f149dc9f4f..c40541dc98ee 100644
Binary files
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin
and
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin
differ
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]