This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 25ecde948beb [SPARK-47487][SQL] Simplify code in AnsiTypeCoercion
25ecde948beb is described below

commit 25ecde948bebf01d2cb1e160516238e1d949ffdb
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Thu Mar 21 08:54:26 2024 -0700

    [SPARK-47487][SQL] Simplify code in AnsiTypeCoercion
    
    ### What changes were proposed in this pull request?
    
    Simplify the code in `AnsiTypeCoercion.implicitCast`, to merge common code 
paths.
    
    ### Why are the changes needed?
    
    improve code readability
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    existing tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #45612 from cloud-fan/type-coercion.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../org/apache/spark/sql/types/DataType.scala      |  2 +-
 .../sql/catalyst/analysis/AnsiTypeCoercion.scala   | 56 ++++++----------------
 2 files changed, 16 insertions(+), 42 deletions(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
index b37924a6d353..16cf6224ce27 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -102,7 +102,7 @@ abstract class DataType extends AbstractDataType {
    */
   private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = 
f(this)
 
-  override private[sql] def defaultConcreteType: DataType = this
+  final override private[sql] def defaultConcreteType: DataType = this
 
   override private[sql] def acceptsType(other: DataType): Boolean = 
sameType(other)
 }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index c70d6696ad06..92ea3ba1ca29 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -180,56 +180,30 @@ object AnsiTypeCoercion extends TypeCoercionBase {
       // cast the input to decimal.
       case (n: NumericType, DecimalType) => Some(DecimalType.forType(n))
 
-      // Cast null type (usually from null literals) into target types
-      // By default, the result type is `target.defaultConcreteType`. When the 
target type is
-      // `TypeCollection`, there is another branch to find the "closet 
convertible data type" below.
-      case (NullType, target) if !target.isInstanceOf[TypeCollection] =>
-        Some(target.defaultConcreteType)
-
       // If a function expects a StringType, no StringType instance should be 
implicitly cast to
       // StringType with a collation that's not accepted (aka. lockdown 
unsupported collations).
       case (_: StringType, StringType) => None
       case (_: StringType, _: StringTypeCollated) => None
 
-      // This type coercion system will allow implicit converting String type 
as other
-      // primitive types, in case of breaking too many existing Spark SQL 
queries.
-      case (StringType, a: AtomicType) =>
-        Some(a)
-
-      // If the target type is any Numeric type, convert the String type as 
Double type.
-      case (StringType, NumericType) =>
-        Some(DoubleType)
-
-      // If the target type is any Decimal type, convert the String type as 
the default
-      // Decimal type.
-      case (StringType, DecimalType) =>
-        Some(DecimalType.SYSTEM_DEFAULT)
-
-      // If the target type is any timestamp type, convert the String type as 
the default
-      // Timestamp type.
-      case (StringType, AnyTimestampType) =>
-        Some(AnyTimestampType.defaultConcreteType)
-
-      case (DateType, AnyTimestampType) =>
-        Some(AnyTimestampType.defaultConcreteType)
-
-      case (_, target: DataType) =>
-        if (Cast.canANSIStoreAssign(inType, target)) {
-          Some(target)
+      // If a function expects integral type, fractional input is not allowed.
+      case (_: FractionalType, IntegralType) => None
+
+      // Ideally the implicit cast rule should be the same as 
`Cast.canANSIStoreAssign` so that it's
+      // consistent with table insertion. To avoid breaking too many existing 
Spark SQL queries,
+      // we make the system to allow implicitly converting String type as 
other primitive types.
+      case (StringType, a @ (_: AtomicType | NumericType | DecimalType | 
AnyTimestampType)) =>
+        Some(a.defaultConcreteType)
+
+      // When the target type is `TypeCollection`, there is another branch to 
find the
+      // "closet convertible data type" below.
+      case (_, target) if !target.isInstanceOf[TypeCollection] =>
+        val concreteType = target.defaultConcreteType
+        if (Cast.canANSIStoreAssign(inType, concreteType)) {
+          Some(concreteType)
         } else {
           None
         }
 
-      // "canANSIStoreAssign" doesn't account for targets extending 
StringTypeCollated, but
-      // ANSIStoreAssign is generally expected to work with StringTypes
-      case (_, st: StringTypeCollated) =>
-        if (Cast.canANSIStoreAssign(inType, st.defaultConcreteType)) {
-          Some(st.defaultConcreteType)
-        }
-        else {
-          None
-        }
-
       // When we reach here, input type is not acceptable for any types in 
this type collection,
       // try to find the first one we can implicitly cast.
       case (_, TypeCollection(types)) =>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to