This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 6e6e425150c2 [SPARK-51468][SQL] Revert "From json/xml should not 
change collations in the given schema"
6e6e425150c2 is described below

commit 6e6e425150c23236cd1f87858549bde6aa0117be
Author: Stefan Kandic <[email protected]>
AuthorDate: Tue Mar 11 17:17:29 2025 +0300

    [SPARK-51468][SQL] Revert "From json/xml should not change collations in 
the given schema"
    
    ### What changes were proposed in this pull request?
    After removing session-level collation (#49772) we can also revert the PR 
that changed the behavior of `from_json` and `from_xml` expressions to use json 
and not sql type representation under the hood (#48750).
    
    ### Why are the changes needed?
    Now that we don't have correctness problems with session level collation, 
using `sql` instead of `json` will lead to smaller and more efficient type 
representation.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Existing unit tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #50234 from stefankandic/revertFromJsonChange.
    
    Authored-by: Stefan Kandic <[email protected]>
    Signed-off-by: Max Gekk <[email protected]>
    (cherry picked from commit 0094f445b2396e97fbb48dfe810fcf65dfdf4828)
    Signed-off-by: Max Gekk <[email protected]>
---
 .../src/main/scala/org/apache/spark/sql/functions.scala |   4 ++--
 .../query-tests/queries/function_from_json.json         |   2 +-
 .../query-tests/queries/function_from_json.proto.bin    | Bin 940 -> 770 bytes
 .../query-tests/queries/function_from_xml.json          |   2 +-
 .../query-tests/queries/function_from_xml.proto.bin     | Bin 937 -> 767 bytes
 5 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
index 90c50cb53f5d..7dfbc9911be1 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
@@ -6901,7 +6901,7 @@ object functions {
    */
   // scalastyle:on line.size.limit
   def from_json(e: Column, schema: DataType, options: Map[String, String]): 
Column = {
-    from_json(e, lit(schema.json), options.iterator)
+    from_json(e, lit(schema.sql), options.iterator)
   }
 
   // scalastyle:off line.size.limit
@@ -7773,7 +7773,7 @@ object functions {
    */
   // scalastyle:on line.size.limit
   def from_xml(e: Column, schema: StructType, options: java.util.Map[String, 
String]): Column =
-    from_xml(e, lit(schema.json), options.asScala.iterator)
+    from_xml(e, lit(schema.sql), options.asScala.iterator)
 
   // scalastyle:off line.size.limit
   /**
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
 
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
index 3c359d024c24..43b1abd1d59a 100644
--- 
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
+++ 
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
@@ -37,7 +37,7 @@
           }
         }, {
           "literal": {
-            "string": 
"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
+            "string": "STRUCT\u003cid: BIGINT, a: INT, b: DOUBLE\u003e"
           },
           "common": {
             "origin": {
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
 
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
index 001e2bd46740..59ecbec22861 100644
Binary files 
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
 and 
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
 differ
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
 
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
index 04ffe209f170..44faa65e6a0c 100644
--- 
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
+++ 
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
@@ -37,7 +37,7 @@
           }
         }, {
           "literal": {
-            "string": 
"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
+            "string": "STRUCT\u003cid: BIGINT, a: INT, b: DOUBLE\u003e"
           },
           "common": {
             "origin": {
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin
 
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin
index d4f149dc9f4f..c40541dc98ee 100644
Binary files 
a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin
 and 
b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin
 differ


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to